From ecc9ce9d64764081c099c0dbf4db94b86672c3d7 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Fri, 6 Sep 2019 16:34:19 +0530 Subject: [PATCH] Gradient on AlphaDropout now working --- src/layers/normalise.jl | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index f402d51f..48859608 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -57,24 +57,19 @@ mutable struct AlphaDropout{F} end end -alphadropout(x, p) = x - -_alphadropout_kernel(x, noise, p, α1) = noise > (1 - p) ? x : α1 - -@adjoint function alphadropout(x, p) +function (a::AlphaDropout)(x) + istraining() || return x λ = eltype(x)(1.0507009873554804934193349852946) α = eltype(x)(1.6732632423543772848170429916717) α1 = eltype(x)(-λ*α) noise = randn(eltype(x), size(x)) - x .= _alphadropout_kernel.(x, noise, p, α1) - A = (p + p * (1 - p) * α1 ^ 2) ^ 0.5 - B = -A * α1 * (1 - p) - x = @. A * x + B - return x, Δ -> (Δ .* A.* noise, nothing) + x = @. x*(noise > (1 - a.p)) + α1 * (noise < (1 - a.p)) + A = (a.p + a.p * (1 - a.p) * α1 ^ 2)^0.5 + B = -A * α1 * (1 - a.p) + x = @. A * x + B + return x end -(a::AlphaDropout)(x) = alphadropout(x, a.p) - """ LayerNorm(h::Integer)