diff --git a/src/layers/normalisation.jl b/src/layers/normalisation.jl
index b59051a4..aa0b04ae 100644
--- a/src/layers/normalisation.jl
+++ b/src/layers/normalisation.jl
@@ -71,7 +71,7 @@ end
     BatchNorm(dims...; λ = identity,
               initβ = zeros, initγ = ones, ϵ = 1e-8, momentum = .1)
 
-Batch Normalization Layer for [`Dense`](@ref) layer.
+Batch Normalization Layer for [`Dense`](@ref) or [`Conv`](@ref) layers.
 
 See [Batch Normalization: Accelerating Deep Network Training by Reducing
      Internal Covariate Shift](https://arxiv.org/pdf/1502.03167.pdf)
@@ -88,6 +88,18 @@ m = Chain(
   BatchNorm(10),
   softmax)
 ```
+Normalization with convolutional layers is handled similarly.
+```julia
+m = Chain(
+  Conv((2,2), 1=>16),
+  BatchNorm(16, λ=relu),
+  x -> maxpool(x, (2,2)),
+  Conv((2,2), 16=>8),
+  BatchNorm(8, λ=relu),
+  x -> maxpool(x, (2,2)),
+  x -> reshape(x, :, size(x, 4)),
+  Dense(288, 10), softmax) |> gpu
+```
 """
 mutable struct BatchNorm{F,V,N}
   λ::F  # activation function