Gradient on AlphaDropout now working

This commit is contained in:
thebhatman 2019-09-06 16:34:19 +05:30
parent 4ca320444e
commit ecc9ce9d64

View File

@ -57,24 +57,19 @@ mutable struct AlphaDropout{F}
end end
end end
alphadropout(x, p) = x function (a::AlphaDropout)(x)
istraining() || return x
_alphadropout_kernel(x, noise, p, α1) = noise > (1 - p) ? x : α1
@adjoint function alphadropout(x, p)
λ = eltype(x)(1.0507009873554804934193349852946) λ = eltype(x)(1.0507009873554804934193349852946)
α = eltype(x)(1.6732632423543772848170429916717) α = eltype(x)(1.6732632423543772848170429916717)
α1 = eltype(x)(-λ*α) α1 = eltype(x)(-λ*α)
noise = randn(eltype(x), size(x)) noise = randn(eltype(x), size(x))
x .= _alphadropout_kernel.(x, noise, p, α1) x = @. x*(noise > (1 - a.p)) + α1 * (noise < (1 - a.p))
A = (p + p * (1 - p) * α1 ^ 2) ^ 0.5 A = (a.p + a.p * (1 - a.p) * α1 ^ 2)^0.5
B = -A * α1 * (1 - p) B = -A * α1 * (1 - a.p)
x = @. A * x + B x = @. A * x + B
return x, Δ -> (Δ .* A.* noise, nothing) return x
end end
(a::AlphaDropout)(x) = alphadropout(x, a.p)
""" """
LayerNorm(h::Integer) LayerNorm(h::Integer)