document regularisation, fixes #160
This commit is contained in:
parent
0e0057b0c4
commit
0b3c02fe8d
@ -10,6 +10,7 @@ makedocs(modules=[Flux, NNlib],
|
|||||||
"Building Models" =>
|
"Building Models" =>
|
||||||
["Basics" => "models/basics.md",
|
["Basics" => "models/basics.md",
|
||||||
"Recurrence" => "models/recurrence.md",
|
"Recurrence" => "models/recurrence.md",
|
||||||
|
"Regularisation" => "models/regularisation.md",
|
||||||
"Model Reference" => "models/layers.md"],
|
"Model Reference" => "models/layers.md"],
|
||||||
"Training Models" =>
|
"Training Models" =>
|
||||||
["Optimisers" => "training/optimisers.md",
|
["Optimisers" => "training/optimisers.md",
|
||||||
|
47
docs/src/models/regularisation.md
Normal file
47
docs/src/models/regularisation.md
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Regularisation
|
||||||
|
|
||||||
|
Applying regularisation to model parameters is straightforward. We just need to
|
||||||
|
apply an appropriate regulariser, such as `norm`, to each model parameter and
|
||||||
|
add the result to the overall loss.
|
||||||
|
|
||||||
|
For example, say we have a simple regression.
|
||||||
|
|
||||||
|
```julia
|
||||||
|
m = Dense(10, 5)
|
||||||
|
loss(x, y) = crossentropy(softmax(m(x)), y)
|
||||||
|
```
|
||||||
|
|
||||||
|
We can regularise this by taking the (L2) norm of the parameters, `m.W` and `m.b`.
|
||||||
|
|
||||||
|
```julia
|
||||||
|
penalty() = norm(m.W) + norm(m.b)
|
||||||
|
loss(x, y) = crossentropy(softmax(m(x)), y) + penalty()
|
||||||
|
```
|
||||||
|
|
||||||
|
When working with layers, Flux provides the `params` function to grab all
|
||||||
|
parameters at once. We can easily penalise everything with `sum(norm, params)`.
|
||||||
|
|
||||||
|
```julia
|
||||||
|
julia> params(m)
|
||||||
|
2-element Array{Any,1}:
|
||||||
|
param([0.355408 0.533092; … 0.430459 0.171498])
|
||||||
|
param([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||||
|
|
||||||
|
julia> sum(norm, params(m))
|
||||||
|
26.01749952921026 (tracked)
|
||||||
|
```
|
||||||
|
|
||||||
|
Here's a larger example with a multi-layer perceptron.
|
||||||
|
|
||||||
|
```julia
|
||||||
|
m = Chain(
|
||||||
|
Dense(28^2, 128, relu),
|
||||||
|
Dense(128, 32, relu),
|
||||||
|
Dense(32, 10), softmax)
|
||||||
|
|
||||||
|
ps = params(m)
|
||||||
|
|
||||||
|
loss(x, y) = crossentropy(m(x), y) + sum(norm, ps)
|
||||||
|
|
||||||
|
loss(rand(28^2), rand(10))
|
||||||
|
```
|
@ -113,6 +113,7 @@ back(::typeof(reshape), Δ, xs::TrackedArray, _...) =
|
|||||||
|
|
||||||
Base.sum(xs::TrackedArray, dim) = track(sum, xs, dim)
|
Base.sum(xs::TrackedArray, dim) = track(sum, xs, dim)
|
||||||
Base.sum(xs::TrackedArray) = track(sum, xs)
|
Base.sum(xs::TrackedArray) = track(sum, xs)
|
||||||
|
Base.sum(f::Union{Function,Type},xs::TrackedArray) = sum(f.(xs))
|
||||||
|
|
||||||
back(::typeof(sum), Δ, xs::TrackedArray, dim...) = back(xs, similar(xs.data) .= Δ)
|
back(::typeof(sum), Δ, xs::TrackedArray, dim...) = back(xs, similar(xs.data) .= Δ)
|
||||||
|
|
||||||
@ -137,6 +138,11 @@ Base.std(x::TrackedArray; mean = Base.mean(x)) =
|
|||||||
Base.std(x::TrackedArray, dim; mean = Base.mean(x, dim)) =
|
Base.std(x::TrackedArray, dim; mean = Base.mean(x, dim)) =
|
||||||
sqrt.(sum((x .- mean).^2, dim) ./ (size(x, dim)-1))
|
sqrt.(sum((x .- mean).^2, dim) ./ (size(x, dim)-1))
|
||||||
|
|
||||||
|
Base.norm(x::TrackedArray, p::Real = 2) =
|
||||||
|
p == 1 ? sum(abs.(x)) :
|
||||||
|
p == 2 ? sqrt(sum(abs2.(x))) :
|
||||||
|
error("$p-norm not supported")
|
||||||
|
|
||||||
back(::typeof(mean), Δ, xs::TrackedArray) = back(xs, similar(xs.data) .= Δ ./ length(xs.data))
|
back(::typeof(mean), Δ, xs::TrackedArray) = back(xs, similar(xs.data) .= Δ ./ length(xs.data))
|
||||||
back(::typeof(mean), Δ, xs::TrackedArray, region) =
|
back(::typeof(mean), Δ, xs::TrackedArray, region) =
|
||||||
back(xs, similar(xs.data) .= Δ ./ prod(size(xs.data, region...)))
|
back(xs, similar(xs.data) .= Δ ./ prod(size(xs.data, region...)))
|
||||||
|
Loading…
Reference in New Issue
Block a user