diff --git a/NEWS.md b/NEWS.md index 4023c7f2..460a9e5b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# v0.10.5 +* Add option for [same padding](https://github.com/FluxML/Flux.jl/pull/901) to conv and pooling layers by setting `pad=SamePad()`. + # v0.10.0 * The default AD engine has switched from [Tracker to Zygote.jl](https://github.com/FluxML/Flux.jl/pull/669) - The dependency on Tracker.jl has been removed. diff --git a/src/Flux.jl b/src/Flux.jl index fb52e859..95134e80 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -11,7 +11,7 @@ using Zygote: Params, @adjoint, gradient, pullback, @nograd export gradient -export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, +export Chain, Dense, Maxout, RNN, LSTM, GRU, SamePad, Conv, CrossCor, ConvTranspose, GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, flatten, DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm, SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode! diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 60666aa2..f8830fee 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -7,6 +7,28 @@ _convtransoutdims(isize, ksize, ssize, dsize, pad) = (isize .- 1).*ssize .+ 1 .+ expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) + +""" + SamePad + +Padding for convolutional layers will be calculated so that outputshape == inputshape when stride = 1. + +For stride > 1 the output shape depends on the type of convolution layer. +""" +struct SamePad end + +calc_padding(pad, k::NTuple{N,T}, dilation, stride) where {T,N}= expand(Val(2*N), pad) +function calc_padding(::SamePad, k::NTuple{N,T}, dilation, stride) where {N,T} + #Ref: "A guide to convolution arithmetic for deep learning" https://arxiv.org/pdf/1603.07285 + + # Effective kernel size, including dilation + k_eff = @. k + (k - 1) * (dilation - 1) + # How much total padding needs to be applied? + pad_amt = @. k_eff - 1 + # In case amount of padding is odd we need to apply different amounts to each side. + return Tuple(mapfoldl(i -> [ceil(Int, i/2), floor(Int, i/2)], vcat, pad_amt)) +end + """ Conv(size, in => out, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1) @@ -18,6 +40,8 @@ Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. +Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride. + # Examples Apply a `Conv` layer to a 1-channel input using a 2×2 window size, giving us a @@ -41,8 +65,8 @@ end function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) - pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + pad = calc_padding(pad, size(w)[1:N-2], dilation, stride) return Conv(σ, w, b, stride, pad, dilation) end @@ -99,6 +123,8 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. + +Use `pad=SamePad()` to apply padding so that outputsize == stride * inputsize - stride + 1. """ struct ConvTranspose{N,M,F,A,V} σ::F @@ -112,8 +138,8 @@ end function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) - pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + pad = calc_padding(pad, size(w)[1:N-2], dilation, stride) return ConvTranspose(σ, w, b, stride, pad, dilation) end @@ -174,6 +200,8 @@ Note that `out` must be an integer multiple of `in`. Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. + +Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride. """ struct DepthwiseConv{N,M,F,A,V} σ::F @@ -187,8 +215,8 @@ end function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) - pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + pad = calc_padding(pad, size(w)[1:N-2], dilation, stride) return DepthwiseConv(σ, w, b, stride, pad, dilation) end @@ -240,6 +268,8 @@ Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. +Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride. + # Examples Apply a `CrossCor` layer to a 1-channel input using a 2×2 window size, giving us a @@ -263,8 +293,8 @@ end function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) - pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + pad = calc_padding(pad, size(w)[1:N-2], dilation, stride) return CrossCor(σ, w, b, stride, pad, dilation) end @@ -358,6 +388,9 @@ end MaxPool(k; pad = 0, stride = k) Max pooling layer. `k` is the size of the window for each dimension of the input. + +Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride. +======= """ struct MaxPool{N,M} k::NTuple{N,Int} @@ -367,8 +400,7 @@ end function MaxPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N stride = expand(Val(N), stride) - pad = expand(Val(2*N), pad) - + pad = calc_padding(pad, k, 1, stride) return MaxPool(k, pad, stride) end @@ -387,6 +419,8 @@ outdims(l::MaxPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l. MeanPool(k; pad = 0, stride = k) Mean pooling layer. `k` is the size of the window for each dimension of the input. + +Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride. """ struct MeanPool{N,M} k::NTuple{N,Int} @@ -396,7 +430,7 @@ end function MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N stride = expand(Val(N), stride) - pad = expand(Val(2*N), pad) + pad = calc_padding(pad, k, 1, stride) return MeanPool(k, pad, stride) end diff --git a/test/layers/conv.jl b/test/layers/conv.jl index e7b3963d..97355b18 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -162,4 +162,28 @@ end @test Flux.outdims(m, (5, 5)) == (4, 4) m = MeanPool((2, 2); stride = 2, pad = 3) @test Flux.outdims(m, (5, 5)) == (5, 5) -end \ No newline at end of file +end + +@testset "$ltype SamePad kernelsize $k" for ltype in (Conv, ConvTranspose, DepthwiseConv, CrossCor), k in ( (1,), (2,), (3,), (4,5), (6,7,8)) + data = ones(Float32, (k .+ 3)..., 1,1) + l = ltype(k, 1=>1, pad=SamePad()) + @test size(l(data)) == size(data) + + l = ltype(k, 1=>1, pad=SamePad(), dilation = k .÷ 2) + @test size(l(data)) == size(data) + + stride = 3 + l = ltype(k, 1=>1, pad=SamePad(), stride = stride) + if ltype == ConvTranspose + @test size(l(data))[1:end-2] == stride .* size(data)[1:end-2] .- stride .+ 1 + else + @test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ stride) + end +end + +@testset "$ltype SamePad windowsize $k" for ltype in (MeanPool, MaxPool), k in ( (1,), (2,), (3,), (4,5), (6,7,8)) + data = ones(Float32, (k .+ 3)..., 1,1) + + l = ltype(k, pad=SamePad()) + @test size(l(data))[1:end-2] == ceil.(Int, size(data)[1:end-2] ./ k) +end