diff --git a/NEWS.md b/NEWS.md index 512f2506..e15ece4c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ # v0.8.0 +* [Dropout now has a `dims` argument for specifying the unbroadcast dimensions.](https://github.com/FluxML/Flux.jl/pull/563) * New [ConvTranspose layer](https://github.com/FluxML/Flux.jl/pull/311). * New [Maxout layer](https://github.com/FluxML/Flux.jl/pull/647) * Datasets are now [hash verified on download](https://github.com/FluxML/Flux.jl/pull/585) to avoid corruption. diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 7c11d411..4e82c863 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -13,32 +13,50 @@ end _testmode!(m, test) = nothing """ - Dropout(p) + Dropout(p, dims = :) A Dropout layer. For each input, either sets that input to `0` (with probability -`p`) or scales it by `1/(1-p)`. This is used as a regularisation, i.e. it -reduces overfitting during training. +`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted + dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is + used as a regularisation, i.e. it reduces overfitting during training. see also [`dropout`](@ref). Does nothing to the input once in [`testmode!`](@ref). """ mutable struct Dropout{F} p::F + dims::Union{Colon, Int, NTuple{N, Int} where N} active::Bool end -function Dropout(p) +function Dropout(p; dims = :) @assert 0 ≤ p ≤ 1 - Dropout{typeof(p)}(p, true) + Dropout{typeof(p)}(p, dims, true) end +_dropout_shape(s, ::Colon) = size(s) +_dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(size(s)))...) + _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) + +""" + dropout(x, p; dims = :) + +The dropout function. For each input, either sets that input to `0` (with probability +`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted + dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is + used as a regularisation, i.e. it reduces overfitting during training. +""" +function dropout(x, p; dims = :) + y = similar(x, _dropout_shape(x, dims)) + rand!(y) + y .= _dropout_kernel.(y, p, 1 - p) + return x .* y +end + function (a::Dropout)(x) a.active || return x - y = similar(x) - rand!(y) - y .= _dropout_kernel.(y, a.p, 1 - a.p) - return x .* y + return dropout(x, a.p; dims = a.dims) end _testmode!(a::Dropout, test) = (a.active = !test) diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl index bfb46cf6..72c2d52b 100644 --- a/test/layers/normalisation.jl +++ b/test/layers/normalisation.jl @@ -26,6 +26,16 @@ using Flux.Tracker: data testmode!(m) y = m(x) @test count(a->a == 0, y) == 0 + + x = rand(100, 50) + m = Dropout(0.5, dims = 2) + y = m(x) + c = map(i->count(a->a==0, @view y[i, :]), 1:100) + @test minimum(c) == maximum(c) + m = Dropout(0.5, dims = 1) + y = m(x) + c = map(i->count(a->a==0, @view y[:, i]), 1:50) + @test minimum(c) == maximum(c) end @testset "BatchNorm" begin