diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 7d1d4d0a..b4d3a035 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -2,6 +2,19 @@ istraining() = false @adjoint istraining() = true, _ -> nothing +_dropout_shape(s, ::Colon) = size(s) +_dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(size(s)))...) + +_dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) + +dropout(x, p; dims = :) = x + +@adjoint function dropout(x, p; dims = :) + y = rand!(similar(x, _dropout_shape(x, dims))) + y .= _dropout_kernel.(y, p, 1 - p) + return x .* y, Δ -> (Δ .* y, nothing) +end + """ Dropout(p, dims = :) @@ -12,33 +25,17 @@ A Dropout layer. For each input, either sets that input to `0` (with probability Does nothing to the input once in [`testmode!`](@ref). """ -mutable struct Dropout{F} +mutable struct Dropout{F,D} p::F - dims::Union{Colon, Int, NTuple{N, Int} where N} + dims::D end function Dropout(p; dims = :) @assert 0 ≤ p ≤ 1 - Dropout{typeof(p)}(p, dims) + Dropout{typeof(p),typeof(dims)}(p, dims) end -_dropout_shape(s, ::Colon) = size(s) -_dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(size(s)))...) - -_dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) - -function dropout(x, p; dims = :) - istraining() || return x - y = similar(x, _dropout_shape(x, dims)) - rand!(y) - y .= _dropout_kernel.(y, p, 1 - p) - return x .* y -end - -function (a::Dropout)(x) - istraining() || return x - return dropout(x, a.p; dims = a.dims) -end +(a::Dropout)(x) = dropout(x, a.p; dims = a.dims) """ AlphaDropout(p)