From 3e14bd878c0bc2966579b20eea4bf69e52f53b6c Mon Sep 17 00:00:00 2001
From: Garben Tanghe <garben.tanghe@gmail.com>
Date: Mon, 2 Dec 2019 13:31:25 +0100
Subject: [PATCH 1/4] added GlobalMaxPool, GlobalMeanPool, and Flatten layers

---
 docs/src/models/layers.md |  3 ++
 src/Flux.jl               |  3 +-
 src/layers/conv.jl        | 80 ++++++++++++++++++++++++++++++++++++---
 test/layers/conv.jl       |  6 +++
 4 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md
index b4b745b2..5f12d41a 100644
--- a/docs/src/models/layers.md
+++ b/docs/src/models/layers.md
@@ -14,10 +14,13 @@ These layers are used to build convolutional neural networks (CNNs).
 ```@docs
 Conv
 MaxPool
+GlobalMaxPool
 MeanPool
+GlobalMeanPool
 DepthwiseConv
 ConvTranspose
 CrossCor
+Flatten
 ```
 
 ## Recurrent Layers
diff --git a/src/Flux.jl b/src/Flux.jl
index 5afa1fc0..725abfa7 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -10,7 +10,8 @@ using Zygote: Params, @adjoint, gradient, pullback, @nograd
 
 export gradient
 
-export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool,
+export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose,
+       GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, Flatten,
        DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
        SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode!
 
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index ef167f71..67004b4a 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -95,8 +95,9 @@ outdims(l::Conv, isize) =
 Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
 `in` and `out` specify the number of input and output channels respectively.
 
-Data should be stored in WHCN order. In other words, a 100×100 RGB image would
-be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
+Data should be stored in WHCN order (width, height, # channels, # batches).
+In other words, a 100×100 RGB image would be a `100×100×3×1` array,
+and a batch of 50 would be a `100×100×3×50` array.
 
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
@@ -171,8 +172,9 @@ Depthwise convolutional layer. `size` should be a tuple like `(2, 2)`.
 `in` and `out` specify the number of input and output channels respectively.
 Note that `out` must be an integer multiple of `in`.
 
-Data should be stored in WHCN order. In other words, a 100×100 RGB image would
-be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
+Data should be stored in WHCN order (width, height, # channels, # batches).
+In other words, a 100×100 RGB image would be a `100×100×3×1` array,
+and a batch of 50 would be a `100×100×3×50` array.
 
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
@@ -304,6 +306,56 @@ end
 outdims(l::CrossCor, isize) =
   output_size(DenseConvDims(_paddims(isize, size(l.weight)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
 
+"""
+    GlobalMaxPool()
+
+Global max pooling layer.
+
+Transforms (w,h,c,b)-shaped input into (1,1,c,b)-shaped output,
+by performing max pooling on the complete (w,h)-shaped feature maps.
+"""
+struct GlobalMaxPool end
+
+function (g::GlobalMaxPool)(x)
+  # Input size
+  x_size = size(x)
+  # Kernel size
+  k = x_size[1:end-2]
+  # Pooling dimensions
+  pdims = PoolDims(x, k)
+
+  return maxpool(x, pdims)
+end
+
+function Base.show(io::IO, g::GlobalMaxPool)
+  print(io, "GlobalMaxPool()")
+end
+
+"""
+    GlobalMeanPool()
+
+Global mean pooling layer.
+
+Transforms (w,h,c,b)-shaped input into (1,1,c,b)-shaped output,
+by performing mean pooling on the complete (w,h)-shaped feature maps.
+"""
+struct GlobalMeanPool end
+
+function (g::GlobalMeanPool)(x)
+  # Input size
+  x_size = size(x)
+  # Kernel size
+  k = x_size[1:end-2]
+  # Pooling dimensions
+  pdims = PoolDims(x, k)
+
+  return meanpool(x, pdims)
+end
+
+function Base.show(io::IO, g::GlobalMeanPool)
+  print(io, "GlobalMeanPool()")
+end
+
 """
     MaxPool(k)
 
@@ -363,4 +415,22 @@ function Base.show(io::IO, m::MeanPool)
   print(io, "MeanPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")")
 end
 
-outdims(l::MeanPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.k..., 1, 1)), l.k; stride = l.stride, padding = l.pad))
\ No newline at end of file
+outdims(l::MeanPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.k..., 1, 1)), l.k; stride = l.stride, padding = l.pad))
+
+"""
+    Flatten()
+
+Flattening layer.
+
+Transforms (w,h,c,b)-shaped input into (w*h*c,b)-shaped output,
+by linearizing all values for each element in the batch.
+"""
+struct Flatten end
+
+function (f::Flatten)(x)
+  return reshape(x, :, size(x)[end])
+end
+
+function Base.show(io::IO, f::Flatten)
+  print(io, "Flatten()")
+end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index 03a0d1a4..60e1898d 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -4,10 +4,16 @@ using Flux: gradient
 
 @testset "Pooling" begin
   x = randn(Float32, 10, 10, 3, 2)
+  gmp = GlobalMaxPool()
+  @test size(gmp(x)) == (1, 1, 3, 2)
+  gmp = GlobalMeanPool()
+  @test size(gmp(x)) == (1, 1, 3, 2)
   mp = MaxPool((2, 2))
   @test mp(x) == maxpool(x, PoolDims(x, 2))
   mp = MeanPool((2, 2))
   @test mp(x) == meanpool(x, PoolDims(x, 2))
+  f = Flatten()
+  @test size(f(x)) == (300, 2)
 end
 
 @testset "CNN" begin

From 82e16a5b291fc115e485bd2fcb1cea731c70c0e4 Mon Sep 17 00:00:00 2001
From: Garben Tanghe <garben.tanghe@gmail.com>
Date: Thu, 5 Dec 2019 14:16:12 +0100
Subject: [PATCH 2/4] split up Flatten layer to use the flatten function

---
 src/layers/conv.jl       | 16 ++++++++++++----
 src/layers/stateless.jl  | 26 ++++++++++++++++++--------
 test/layers/stateless.jl |  9 ++++++++-
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 67004b4a..faca0895 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -425,12 +425,20 @@ Flattening layer.
 Transforms (w,h,c,b)-shaped input into (w*h*c,b)-shaped output,
 by linearizing all values for each element in the batch.
 """
-struct Flatten end
+struct Flatten{F}
+  σ::F
+  function Flatten(σ::F = identity) where {F}
+    return new{F}(σ)
+  end
+end
 
-function (f::Flatten)(x)
-  return reshape(x, :, size(x)[end])
+function (f::Flatten)(x::AbstractArray)
+  σ = f.σ
+  σ(flatten(x))
 end
 
 function Base.show(io::IO, f::Flatten)
-  print(io, "Flatten()")
+  print(io, "Flatten(")
+  f.σ == identity || print(io, f.σ)
+  print(io, ")")
 end
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
index 2fd98815..a9e6c6e5 100644
--- a/src/layers/stateless.jl
+++ b/src/layers/stateless.jl
@@ -2,7 +2,7 @@
 """
     mae(ŷ, y)
 
-Return the mean of absolute error `sum(abs.(ŷ .- y)) / length(y)` 
+Return the mean of absolute error `sum(abs.(ŷ .- y)) / length(y)`
 """
 mae(ŷ, y) = sum(abs.(ŷ .- y)) * 1 // length(y)
 
@@ -10,7 +10,7 @@ mae(ŷ, y) = sum(abs.(ŷ .- y)) * 1 // length(y)
 """
     mse(ŷ, y)
 
-Return the mean squared error `sum((ŷ .- y).^2) / length(y)`. 
+Return the mean squared error `sum((ŷ .- y).^2) / length(y)`.
 """
 mse(ŷ, y) = sum((ŷ .- y).^2) * 1 // length(y)
 
@@ -19,7 +19,7 @@ mse(ŷ, y) = sum((ŷ .- y).^2) * 1 // length(y)
     msle(ŷ, y; ϵ=eps(eltype(ŷ)))
 
 Returns the mean of the squared logarithmic errors `sum((log.(ŷ .+ ϵ) .- log.(y .+ ϵ)).^2) / length(y)`.
-The `ϵ` term provides numerical stability. 
+The `ϵ` term provides numerical stability.
 
 This error penalizes an under-predicted estimate greater than an over-predicted estimate.
 """
@@ -60,7 +60,7 @@ end
 """
     crossentropy(ŷ, y; weight=1)
 
-Return the crossentropy computed as `-sum(y .* log.(ŷ) .* weight) / size(y, 2)`. 
+Return the crossentropy computed as `-sum(y .* log.(ŷ) .* weight) / size(y, 2)`.
 
 See also [`logitcrossentropy`](@ref), [`binarycrossentropy`](@ref).
 """
@@ -69,7 +69,7 @@ crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _cros
 """
     logitcrossentropy(ŷ, y; weight=1)
 
-Return the crossentropy computed after a [softmax](@ref) operation: 
+Return the crossentropy computed after a [softmax](@ref) operation:
 
   -sum(y .* logsoftmax(ŷ) .* weight) / size(y, 2)
 
@@ -97,7 +97,7 @@ CuArrays.@cufunc binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1
 `logitbinarycrossentropy(ŷ, y)` is mathematically equivalent to `binarycrossentropy(σ(ŷ), y)`
 but it is more numerically stable.
 
-See also [`binarycrossentropy`](@ref), [`sigmoid`](@ref), [`logsigmoid`](@ref).  
+See also [`binarycrossentropy`](@ref), [`sigmoid`](@ref), [`logsigmoid`](@ref).
 """
 logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)
 
@@ -162,7 +162,7 @@ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) * 1 // size(y,2)
 """
     hinge(ŷ, y)
 
-Measures the loss given the prediction `ŷ` and true labels `y` (containing 1 or -1). 
+Measures the loss given the prediction `ŷ` and true labels `y` (containing 1 or -1).
 Returns `sum((max.(0, 1 .- ŷ .* y))) / size(y, 2)`
 
 [Hinge Loss](https://en.wikipedia.org/wiki/Hinge_loss)
@@ -193,10 +193,20 @@ dice_coeff_loss(ŷ, y; smooth=eltype(ŷ)(1.0)) = 1 - (2*sum(y .* ŷ) + smooth
 """
     tversky_loss(ŷ, y; β=0.7)
 
-Used with imbalanced data to give more weightage to False negatives. 
+Used with imbalanced data to give more weightage to False negatives.
 Larger β weigh recall higher than precision (by placing more emphasis on false negatives)
 Returns `1 - sum(|y .* ŷ| + 1) / (sum(y .* ŷ + β*(1 .- y) .* ŷ + (1 - β)*y .* (1 .- ŷ)) + 1)`
 
 [Tversky loss function for image segmentation using 3D fully convolutional deep networks](https://arxiv.org/pdf/1706.05721.pdf)
 """
 tversky_loss(ŷ, y; β=eltype(ŷ)(0.7)) = 1 - (sum(y .* ŷ) + 1) / (sum(y .* ŷ + β*(1 .- y) .* ŷ + (1 - β)*y .* (1 .- ŷ)) + 1)
+
+"""
+    flatten(x::AbstractArray)
+
+Transforms (w,h,c,b)-shaped input into (w*h*c,b)-shaped output,
+by linearizing all values for each element in the batch.
+"""
+function flatten(x::AbstractArray)
+  return reshape(x, :, size(x)[end])
+end
diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl
index ce940bf9..ebcd815c 100644
--- a/test/layers/stateless.jl
+++ b/test/layers/stateless.jl
@@ -1,6 +1,6 @@
 using Test
 using Flux: onehotbatch, mse, crossentropy, logitcrossentropy,
-            σ, binarycrossentropy, logitbinarycrossentropy
+            σ, binarycrossentropy, logitbinarycrossentropy, flatten
 
 const ϵ = 1e-7
 
@@ -116,3 +116,10 @@ const ϵ = 1e-7
     end
   end
 end
+
+@testset "helpers" begin
+  @testset "flatten" begin
+    x = randn(Float32, 10, 10, 3, 2)
+    @test size(flatten(x)) == (300, 2)
+  end
+end

From 746e3310f18485c0c30c9975c71c88d53d00fe26 Mon Sep 17 00:00:00 2001
From: Garben Tanghe <garben.tanghe@gmail.com>
Date: Thu, 27 Feb 2020 12:44:17 +0100
Subject: [PATCH 3/4] removed Flatten struct

updated documentation
---
 docs/src/models/layers.md |  2 +-
 src/Flux.jl               |  2 +-
 src/layers/conv.jl        | 26 --------------------------
 test/layers/conv.jl       |  2 --
 4 files changed, 2 insertions(+), 30 deletions(-)

diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md
index 5f12d41a..2b5c1591 100644
--- a/docs/src/models/layers.md
+++ b/docs/src/models/layers.md
@@ -20,7 +20,7 @@ GlobalMeanPool
 DepthwiseConv
 ConvTranspose
 CrossCor
-Flatten
+flatten
 ```
 
 ## Recurrent Layers
diff --git a/src/Flux.jl b/src/Flux.jl
index 725abfa7..f973dc4c 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -11,7 +11,7 @@ using Zygote: Params, @adjoint, gradient, pullback, @nograd
 export gradient
 
 export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose,
-       GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, Flatten,
+       GlobalMaxPool, GlobalMeanPool, MaxPool, MeanPool, flatten,
        DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, InstanceNorm, GroupNorm,
        SkipConnection, params, fmap, cpu, gpu, f32, f64, testmode!, trainmode!
 
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index faca0895..742091a6 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -416,29 +416,3 @@ function Base.show(io::IO, m::MeanPool)
 end
 
 outdims(l::MeanPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.k..., 1, 1)), l.k; stride = l.stride, padding = l.pad))
-
-"""
-    Flatten()
-
-Flattening layer.
-
-Transforms (w,h,c,b)-shaped input into (w*h*c,b)-shaped output,
-by linearizing all values for each element in the batch.
-"""
-struct Flatten{F}
-  σ::F
-  function Flatten(σ::F = identity) where {F}
-    return new{F}(σ)
-  end
-end
-
-function (f::Flatten)(x::AbstractArray)
-  σ = f.σ
-  σ(flatten(x))
-end
-
-function Base.show(io::IO, f::Flatten)
-  print(io, "Flatten(")
-  f.σ == identity || print(io, f.σ)
-  print(io, ")")
-end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index 60e1898d..e7b3963d 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -12,8 +12,6 @@ using Flux: gradient
   @test mp(x) == maxpool(x, PoolDims(x, 2))
   mp = MeanPool((2, 2))
   @test mp(x) == meanpool(x, PoolDims(x, 2))
-  f = Flatten()
-  @test size(f(x)) == (300, 2)
 end
 
 @testset "CNN" begin

From fc3af681ec45f9fb8ea405a447908c1b8bc9bbed Mon Sep 17 00:00:00 2001
From: Garben Tanghe <garben.tanghe@gmail.com>
Date: Sun, 8 Mar 2020 14:17:00 +0100
Subject: [PATCH 4/4] updated documentation

---
 src/layers/stateless.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
index a9e6c6e5..eebbbe98 100644
--- a/src/layers/stateless.jl
+++ b/src/layers/stateless.jl
@@ -204,7 +204,7 @@ tversky_loss(ŷ, y; β=eltype(ŷ)(0.7)) = 1 - (sum(y .* ŷ) + 1) / (sum(y .*
 """
     flatten(x::AbstractArray)
 
-Transforms (w,h,c,b)-shaped input into (w*h*c,b)-shaped output,
+Transforms (w,h,c,b)-shaped input into (w x h x c,b)-shaped output,
 by linearizing all values for each element in the batch.
 """
 function flatten(x::AbstractArray)