2018-10-01 00:00:53 +00:00
|
|
|
|
using Base: depwarn
|
2018-11-01 09:32:00 +00:00
|
|
|
|
using Flux: Params
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
2018-10-31 14:58:55 +00:00
|
|
|
|
check_decay(opt, decay) = decay == 0 ? opt : Optimiser(opt, InvDecay(decay))
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
# legacy update rule
|
2018-11-01 09:32:00 +00:00
|
|
|
|
updaterule(opt, ps) = () -> update!(opt, ps)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function SGD(params::Union{AbstractArray, Params}, η = 0.1; decay = 0.)
|
2018-10-31 15:30:30 +00:00
|
|
|
|
depwarn("SGD(params) is deprecated; use Descent(η::Float64) instead", :SGD)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
opt = Descent(η)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function Momentum(params::Union{AbstractArray, Params}, η = 0.01; ρ = 0.9, decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("Momentum(params) is deprecated; use Momentum(η::Float64) instead", :Momentum)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
opt = Momentum(η, ρ)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function Nesterov(params::Union{AbstractArray, Params}, η = 0.001; ρ = 0.9, decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("Nesterov(params) is deprecated; use Nesterov(η::Float64) instead", :Nesterov)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
opt = Nesterov(η, ρ)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function RMSProp(params::Union{AbstractArray, Params}, η = 0.001; ρ = 0.9, decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("RMSProp(params) is deprecated; use RMSProp(η::Float64) instead", :RMSProp)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
opt = RMSProp(η, ρ)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function ADAM(params::Union{AbstractArray, Params}, η = 0.001; β1 = 0.9, β2 = 0.999, decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("ADAM(params) is deprecated; use ADAM(η::Float64) instead", :ADAM)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
β = (β1, β2)
|
|
|
|
|
opt = ADAM(η, β)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function ADAGrad(params::Union{AbstractArray, Params}, η::Float64 = 0.1; decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("ADAGrad(params) is deprecated; use ADAGrad(η::Float64) instead", :ADAGrad)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
opt = ADAGrad(η)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function ADADelta(params::Union{AbstractArray, Params}, ρ::Float64 = 0.9; decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("ADADelta(params) is deprecated; use ADADelta(η::Float64) instead", :ADADelta)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
opt = ADADelta(ρ)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function AdaMax(params::Union{AbstractArray, Params}, η = 0.001; β1 = 0.9, β2 = 0.999, decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("AdaMax(params) is deprecated; use AdaMax(η::Float64) instead", :AdaMax)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
β = (β1, β2)
|
|
|
|
|
opt = AdaMax(η, β)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function AMSGrad(params::Union{AbstractArray, Params}, η = 0.001; β1 = 0.9, β2 = 0.999, decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("AMSGrad(params) is deprecated; use AMSGrad(η::Float64) instead", :AMSGrad)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
β = (β1, β2)
|
|
|
|
|
opt = AMSGrad(η, β)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function NADAM(params::Union{AbstractArray, Params}, η = 0.001; β1 = 0.9, β2 = 0.999, decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("NADAM(params) is deprecated; use NADAM(η::Float64) instead", :NADAM)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
β = (β1, β2)
|
|
|
|
|
opt = NADAM(η, β)
|
|
|
|
|
opt = check_decay(opt, decay)
|
|
|
|
|
updaterule(opt, ps)
|
|
|
|
|
end
|
|
|
|
|
|
2018-11-01 16:47:54 +00:00
|
|
|
|
function ADAMW(params::Union{AbstractArray, Params}, η = 0.001; β1 = 0.9, β2 = 0.999, decay = 0.)
|
2018-10-31 14:58:55 +00:00
|
|
|
|
depwarn("ADAMW(params) is deprecated; use ADAMW(η::Float64) instead", :ADAMW)
|
2018-10-01 00:00:53 +00:00
|
|
|
|
|
|
|
|
|
ps = params
|
|
|
|
|
β = (β1, β2)
|
|
|
|
|
opt = ADAMW(η, β)
|
|
|
|
|
opt = check_decay(opt, decay)
|
2018-11-01 10:28:40 +00:00
|
|
|
|
decay != 0 && (opt = Optimiser(opt, WeightDecay(decay)))
|
2018-10-01 00:00:53 +00:00
|
|
|
|
updaterule(opt, ps)
|
2018-10-05 11:37:47 +00:00
|
|
|
|
end
|
2018-10-11 04:37:16 +00:00
|
|
|
|
|
2018-10-31 14:58:55 +00:00
|
|
|
|
# Old training loop
|
|
|
|
|
|
|
|
|
|
struct OldOptimiser
|
|
|
|
|
func
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
update!(opt::OldOptimiser, ps) = opt.func()
|
|
|
|
|
|
2018-10-11 04:37:16 +00:00
|
|
|
|
# Train function
|
2018-10-31 14:58:55 +00:00
|
|
|
|
function train!(loss, data, opt; cb = () -> ())
|
|
|
|
|
depwarn("train!(loss, data, opt) is deprecated; use train!(loss, params, data, opt) instead", :train!)
|
|
|
|
|
train!(loss, (), data, OldOptimiser(opt); cb = cb)
|
2018-10-27 13:56:42 +00:00
|
|
|
|
end
|