Add RADAM optimizer

This commit is contained in:
Fredrik Bagge Carlson 2019-08-19 12:22:32 +08:00 committed by GitHub
parent aab3c4e052
commit ebbad0d135
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -108,6 +108,36 @@ function apply!(o::ADAM, x, Δ)
return Δ
end
"""
RADAM(η = 0.001, β = (0.9, 0.999))
[RADAM](https://arxiv.org/pdf/1908.03265v1.pdf) optimiser (Rectified ADAM).
"""
mutable struct RADAM
eta::Float64
beta::Tuple{Float64,Float64}
state::IdDict
end
RADAM(η = 0.001, β = (0.9, 0.999)) = RADAM(η, β, IdDict())
function apply!(o::RADAM, x, Δ)
η, β = o.eta, o.beta
ρ∞ = 2/(1-β[2])-1
mt, vt, βp, t = get!(o.state, x, (zero(x), zero(x), β, 1))
@. mt = β[1] * mt + (1 - β[1]) * Δ
@. vt = β[2] * vt + (1 - β[2]) * Δ^2
ρ = ρ∞ - 2t*βp[2]/(1-βp[2])
if ρ > 4
r = sqrt((ρ-4)*(ρ-2)*ρ∞/((ρ∞-4)*(ρ∞-2)*ρ))
@. Δ = mt / (1 - βp[1]) / ((vt / (1 - βp[2])) + ϵ) * η * r
else
@. Δ = mt / (1 - βp[1]) * η
end
o.state[x] = (mt, vt, βp .* β, t+1)
return Δ
end
"""
AdaMax(params, η = 0.001; β1 = 0.9, β2 = 0.999, ϵ = 1e-08)