Merge #950

950: added GlobalMaxPool, GlobalMeanPool, and flatten layers r=CarloLucibello a=gartangh Co-authored-by: Garben Tanghe <garben.tanghe@gmail.com>
2020-03-08 14:27:10 +00:00 · 2020-03-08 14:27:10 +00:00 · d4cf1436df
commit d4cf1436df
parent 5a4f1932a6 fc3af681ec
6 changed files with 92 additions and 15 deletions
--- a/docs/src/models/layers.md
+++ b/docs/src/models/layers.md
@ -14,10 +14,13 @@ These layers are used to build convolutional neural networks (CNNs).
 ```@docs
 Conv
 MaxPool
 GlobalMaxPool
 MeanPool
 GlobalMeanPool
 DepthwiseConv
 ConvTranspose
 CrossCor
 flatten
 ```
 ## Recurrent Layers
--- a/src/Flux.jl
+++ b/src/Flux.jl
@ -10,7 +10,8 @@ using Zygote: Params, @adjoint, gradient, pullback, @nograd
 export gradient
-export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool,
+export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose,
       GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, flatten,
       DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
       SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode!
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@ -95,8 +95,9 @@ outdims(l::Conv, isize) =
 Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
 `in` and `out` specify the number of input and output channels respectively.
-Data should be stored in WHCN order. In other words, a 100×100 RGB image would
+Data should be stored in WHCN order (width, height, # channels, # batches).
-be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
+In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
@ -171,8 +172,9 @@ Depthwise convolutional layer. `size` should be a tuple like `(2, 2)`.
 `in` and `out` specify the number of input and output channels respectively.
 Note that `out` must be an integer multiple of `in`.
-Data should be stored in WHCN order. In other words, a 100×100 RGB image would
+Data should be stored in WHCN order (width, height, # channels, # batches).
-be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
+In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
@ -304,6 +306,56 @@ end
 outdims(l::CrossCor, isize) =
  output_size(DenseConvDims(_paddims(isize, size(l.weight)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
 """
    GlobalMaxPool()
 Global max pooling layer.
 Transforms (w,h,c,b)-shaped input into (1,1,c,b)-shaped output,
 by performing max pooling on the complete (w,h)-shaped feature maps.
 """
 struct GlobalMaxPool end
 function (g::GlobalMaxPool)(x)
  # Input size
  x_size = size(x)
  # Kernel size
  k = x_size[1:end-2]
  # Pooling dimensions
  pdims = PoolDims(x, k)
  return maxpool(x, pdims)
 end
 function Base.show(io::IO, g::GlobalMaxPool)
  print(io, "GlobalMaxPool()")
 end
 """
    GlobalMeanPool()
 Global mean pooling layer.
 Transforms (w,h,c,b)-shaped input into (1,1,c,b)-shaped output,
 by performing mean pooling on the complete (w,h)-shaped feature maps.
 """
 struct GlobalMeanPool end
 function (g::GlobalMeanPool)(x)
  # Input size
  x_size = size(x)
  # Kernel size
  k = x_size[1:end-2]
  # Pooling dimensions
  pdims = PoolDims(x, k)
  return meanpool(x, pdims)
 end
 function Base.show(io::IO, g::GlobalMeanPool)
  print(io, "GlobalMeanPool()")
 end
 """
    MaxPool(k)
--- a/src/layers/stateless.jl
+++ b/src/layers/stateless.jl
@ -200,3 +200,13 @@ Returns `1 - sum(|y .* ŷ| + 1) / (sum(y .* ŷ + β*(1 .- y) .* ŷ + (1 - β)
 [Tversky loss function for image segmentation using 3D fully convolutional deep networks](https://arxiv.org/pdf/1706.05721.pdf)
 """
 tversky_loss(ŷ, y; β=eltype(ŷ)(0.7)) = 1 - (sum(y .* ŷ) + 1) / (sum(y .* ŷ + β*(1 .- y) .* ŷ + (1 - β)*y .* (1 .- ŷ)) + 1)
 """
    flatten(x::AbstractArray)
 Transforms (w,h,c,b)-shaped input into (w x h x c,b)-shaped output,
 by linearizing all values for each element in the batch.
 """
 function flatten(x::AbstractArray)
  return reshape(x, :, size(x)[end])
 end
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@ -4,6 +4,10 @@ using Flux: gradient
@testset "Pooling" begin
  x = randn(Float32, 10, 10, 3, 2)
  gmp = GlobalMaxPool()
  @test size(gmp(x)) == (1, 1, 3, 2)
  gmp = GlobalMeanPool()
  @test size(gmp(x)) == (1, 1, 3, 2)
  mp = MaxPool((2, 2))
  @test mp(x) == maxpool(x, PoolDims(x, 2))
  mp = MeanPool((2, 2))
--- a/test/layers/stateless.jl
+++ b/test/layers/stateless.jl
@ -1,6 +1,6 @@
 using Test
 using Flux: onehotbatch, mse, crossentropy, logitcrossentropy,
-            σ, binarycrossentropy, logitbinarycrossentropy
+            σ, binarycrossentropy, logitbinarycrossentropy, flatten
 const ϵ = 1e-7
@ -116,3 +116,10 @@ const ϵ = 1e-7
    end
  end
 end
@testset "helpers" begin
  @testset "flatten" begin
    x = randn(Float32, 10, 10, 3, 2)
    @test size(flatten(x)) == (300, 2)
  end
 end