563: noise shape for dropout r=MikeInnes a=chengchingwen

I add the noise shape for dropout, similar to the `noise_shape` argument in [`tf.nn.dropout`](https://www.tensorflow.org/api_docs/python/tf/nn/dropout)

Co-authored-by: chengchingwen <adgjl5645@hotmail.com>
Co-authored-by: Peter <adgjl5645@hotmail.com>
This commit is contained in:
bors[bot] 2019-05-13 17:16:10 +00:00
commit 68ba6e4e2f
3 changed files with 38 additions and 9 deletions

View File

@ -3,6 +3,7 @@
# v0.8.0 # v0.8.0
* [Dropout now has a `dims` argument for specifying the unbroadcast dimensions.](https://github.com/FluxML/Flux.jl/pull/563)
* New [ConvTranspose layer](https://github.com/FluxML/Flux.jl/pull/311). * New [ConvTranspose layer](https://github.com/FluxML/Flux.jl/pull/311).
* New [Maxout layer](https://github.com/FluxML/Flux.jl/pull/647) * New [Maxout layer](https://github.com/FluxML/Flux.jl/pull/647)
* Datasets are now [hash verified on download](https://github.com/FluxML/Flux.jl/pull/585) to avoid corruption. * Datasets are now [hash verified on download](https://github.com/FluxML/Flux.jl/pull/585) to avoid corruption.

View File

@ -13,32 +13,50 @@ end
_testmode!(m, test) = nothing _testmode!(m, test) = nothing
""" """
Dropout(p) Dropout(p, dims = :)
A Dropout layer. For each input, either sets that input to `0` (with probability A Dropout layer. For each input, either sets that input to `0` (with probability
`p`) or scales it by `1/(1-p)`. This is used as a regularisation, i.e. it `p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted
reduces overfitting during training. dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is
used as a regularisation, i.e. it reduces overfitting during training. see also [`dropout`](@ref).
Does nothing to the input once in [`testmode!`](@ref). Does nothing to the input once in [`testmode!`](@ref).
""" """
mutable struct Dropout{F} mutable struct Dropout{F}
p::F p::F
dims::Union{Colon, Int, NTuple{N, Int} where N}
active::Bool active::Bool
end end
function Dropout(p) function Dropout(p; dims = :)
@assert 0 p 1 @assert 0 p 1
Dropout{typeof(p)}(p, true) Dropout{typeof(p)}(p, dims, true)
end end
_dropout_shape(s, ::Colon) = size(s)
_dropout_shape(s, dims) = tuple((i dims ? 1 : si for (i, si) enumerate(size(s)))...)
_dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0)
"""
dropout(x, p; dims = :)
The dropout function. For each input, either sets that input to `0` (with probability
`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted
dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is
used as a regularisation, i.e. it reduces overfitting during training.
"""
function dropout(x, p; dims = :)
y = similar(x, _dropout_shape(x, dims))
rand!(y)
y .= _dropout_kernel.(y, p, 1 - p)
return x .* y
end
function (a::Dropout)(x) function (a::Dropout)(x)
a.active || return x a.active || return x
y = similar(x) return dropout(x, a.p; dims = a.dims)
rand!(y)
y .= _dropout_kernel.(y, a.p, 1 - a.p)
return x .* y
end end
_testmode!(a::Dropout, test) = (a.active = !test) _testmode!(a::Dropout, test) = (a.active = !test)

View File

@ -26,6 +26,16 @@ using Flux.Tracker: data
testmode!(m) testmode!(m)
y = m(x) y = m(x)
@test count(a->a == 0, y) == 0 @test count(a->a == 0, y) == 0
x = rand(100, 50)
m = Dropout(0.5, dims = 2)
y = m(x)
c = map(i->count(a->a==0, @view y[i, :]), 1:100)
@test minimum(c) == maximum(c)
m = Dropout(0.5, dims = 1)
y = m(x)
c = map(i->count(a->a==0, @view y[:, i]), 1:50)
@test minimum(c) == maximum(c)
end end
@testset "BatchNorm" begin @testset "BatchNorm" begin