Update AlphaDropout

2019-08-31 01:28:58 +05:30 · 2019-08-31 01:28:58 +05:30 · 2f1a187665
commit 2f1a187665
parent ee74f1a311
1 changed files with 18 additions and 7 deletions
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@ -43,6 +43,12 @@ function Base.show(io::IO, d::Dropout)
  print(io, ")")
 end
 """
    AlphaDropout(p)
 A dropout layer. It is used in Self-Normalizing Neural Networks.
 (https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf)
 The AlphaDropout layer ensures that mean and variance of activations remains the same as before.
 """
 """
    AlphaDropout(p)
 A dropout layer. It is used in Self-Normalizing Neural Networks.
@ -57,19 +63,24 @@ mutable struct AlphaDropout{F}
  end
 end
-function (a::AlphaDropout)(x)
+alphadropout(x, p) = x
-  istraining() || return x
+
 _alphadropout_kernel(x, noise, p, α1) = noise > (1 - p) ? x : α1
@adjoint function alphadropout(x, p)
  λ = eltype(x)(1.0507009873554804934193349852946)
  α = eltype(x)(1.6732632423543772848170429916717)
  α1 = eltype(x)(-λ*α)
  noise = randn(eltype(x), size(x))
-  x = @. x*(noise > (1 - a.p)) + α1 * (noise <= (1 - a.p))
+  x .= _alphadropout_kernel.(x, noise, p, α1)
-  A = (a.p + a.p * (1 - a.p) * α1 ^ 2)^0.5
+  A = (p + p * (1 - p) * α1 ^ 2) ^ 0.5
-  B = -A * α1 * (1 - a.p)
+  B = -A * α1 * (1 - p)
-  x = @. A * x + B
+  x = @. A * x + B 
-  return x
+  return x, Δ -> (Δ .* A.* noise, nothing)
 end
 (a::AlphaDropout)(x) = alphadropout(x, a.p)
 """
    LayerNorm(h::Integer)