Add RADAM optimizer
This commit is contained in:
parent
aab3c4e052
commit
ebbad0d135
@ -108,6 +108,36 @@ function apply!(o::ADAM, x, Δ)
|
|||||||
return Δ
|
return Δ
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
RADAM(η = 0.001, β = (0.9, 0.999))
|
||||||
|
|
||||||
|
[RADAM](https://arxiv.org/pdf/1908.03265v1.pdf) optimiser (Rectified ADAM).
|
||||||
|
"""
|
||||||
|
mutable struct RADAM
|
||||||
|
eta::Float64
|
||||||
|
beta::Tuple{Float64,Float64}
|
||||||
|
state::IdDict
|
||||||
|
end
|
||||||
|
|
||||||
|
RADAM(η = 0.001, β = (0.9, 0.999)) = RADAM(η, β, IdDict())
|
||||||
|
|
||||||
|
function apply!(o::RADAM, x, Δ)
|
||||||
|
η, β = o.eta, o.beta
|
||||||
|
ρ∞ = 2/(1-β[2])-1
|
||||||
|
mt, vt, βp, t = get!(o.state, x, (zero(x), zero(x), β, 1))
|
||||||
|
@. mt = β[1] * mt + (1 - β[1]) * Δ
|
||||||
|
@. vt = β[2] * vt + (1 - β[2]) * Δ^2
|
||||||
|
ρ = ρ∞ - 2t*βp[2]/(1-βp[2])
|
||||||
|
if ρ > 4
|
||||||
|
r = sqrt((ρ-4)*(ρ-2)*ρ∞/((ρ∞-4)*(ρ∞-2)*ρ))
|
||||||
|
@. Δ = mt / (1 - βp[1]) / (√(vt / (1 - βp[2])) + ϵ) * η * r
|
||||||
|
else
|
||||||
|
@. Δ = mt / (1 - βp[1]) * η
|
||||||
|
end
|
||||||
|
o.state[x] = (mt, vt, βp .* β, t+1)
|
||||||
|
return Δ
|
||||||
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
AdaMax(params, η = 0.001; β1 = 0.9, β2 = 0.999, ϵ = 1e-08)
|
AdaMax(params, η = 0.001; β1 = 0.9, β2 = 0.999, ϵ = 1e-08)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user