diff --git a/src/layers/normalisation.jl b/src/layers/normalisation.jl index b59051a4..aa0b04ae 100644 --- a/src/layers/normalisation.jl +++ b/src/layers/normalisation.jl @@ -71,7 +71,7 @@ end BatchNorm(dims...; λ = identity, initβ = zeros, initγ = ones, ϵ = 1e-8, momentum = .1) -Batch Normalization Layer for [`Dense`](@ref) layer. +Batch Normalization Layer for [`Dense`](@ref) or [`Conv`](@ref) layers. See [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/pdf/1502.03167.pdf) @@ -88,6 +88,18 @@ m = Chain( BatchNorm(10), softmax) ``` +Normalization with convolutional layers is handled similarly. +```julia +m = Chain( + Conv((2,2), 1=>16), + BatchNorm(16, λ=relu), + x -> maxpool(x, (2,2)), + Conv((2,2), 16=>8), + BatchNorm(8, λ=relu), + x -> maxpool(x, (2,2)), + x -> reshape(x, :, size(x, 4)), + Dense(288, 10), softmax) |> gpu +``` """ mutable struct BatchNorm{F,V,N} λ::F # activation function