Flux.jl/src/optimise/train.jl

using Juno
import Zygote: Params, gradient


"""
    update!(x, x̄)

Update the array `x` according to `x .-= x̄`.
"""
function update!(x::AbstractArray, x̄)
  x .-= x̄
end

"""
    update!(opt, p, g)
    update!(opt, ps::Params, gs)

Perform an update step of the parameters `ps` (or the single parameter `p`)
according to optimizer `opt`  and the gradients `gs` (the gradient `g`).

As a result, the parameters are mutated and the optimizer's internal state may change.
"""
function update!(opt, x, x̄)
  x .-= apply!(opt, x, x̄)
end

function update!(opt, xs::Params, gs)
  for x in xs
    gs[x] == nothing && continue
    update!(opt, x, gs[x])
  end
end

# Callback niceties
call(f, xs...) = f(xs...)
runall(f) = f
runall(fs::AbstractVector) = () -> foreach(call, fs)

struct StopException <: Exception end

"""
    stop()

Call `Flux.stop()` in a callback to indicate when a callback condition is met.
This will trigger the train loop to stop and exit.

# Examples
```julia
cb = function ()
  accuracy() > 0.9 && Flux.stop()
end
```
"""
function stop()
  throw(StopException())
end

"""
    train!(loss, params, data, opt; cb)

For each datapoint `d` in `data` compute the gradient of `loss(d...)` through
backpropagation and call the optimizer `opt`.

In case datapoints `d` are of numeric array type, assume no splatting is needed
and compute the gradient of `loss(d)`.

A callback is given with the keyword argument `cb`. For example, this will print
"training" every 10 seconds (using [`Flux.throttle`](@ref)):

  train!(loss, params, data, opt,
         cb = throttle(() -> println("training"), 10))

The callback can call [`Flux.stop`](@ref) to interrupt the training loop.

Multiple optimisers and callbacks can be passed to `opt` and `cb` as arrays.
"""
function train!(loss, ps, data, opt; cb = () -> ())
  ps = Params(ps)
  cb = runall(cb)
  @progress for d in data
    try
      if d isa AbstractArray{<:Number}
        gs = gradient(ps) do
          loss(d)
        end
      else
        gs = gradient(ps) do
          loss(d...)
        end
      end
      update!(opt, ps, gs)
      cb()
    catch ex
      if ex isa StopException
        break
      else
        rethrow(ex)
      end
    end
  end
end

"""
    @epochs N body

Run `body` `N` times. Mainly useful for quickly doing multiple epochs of
training in a REPL.

# Examples
```jldoctest
julia> Flux.@epochs 2 println("hello")
[ Info: Epoch 1
hello
[ Info: Epoch 2
hello
```
"""
macro epochs(n, ex)
  :(@progress for i = 1:$(esc(n))
      @info "Epoch $i"
      $(esc(ex))
    end)
end
juno progress 2017-09-03 06:44:32 +00:00			`using Juno`
break all the things 2019-03-08 12:06:09 +00:00			`import Zygote: Params, gradient`
initial sketch 2018-05-31 19:29:59 +00:00
update docs and export update! 2020-02-26 19:27:39 +00:00

Further docstring improvements in src/ Some had to be re-done after the rebase 2020-04-04 20:59:45 +00:00			`"""`
Fix doc indent 2020-04-14 04:12:06 +00:00			`update!(x, x̄)`
Further docstring improvements in src/ Some had to be re-done after the rebase 2020-04-04 20:59:45 +00:00
update docs and export update! 2020-02-26 19:27:39 +00:00			Update the array `x` according to `x .-= x̄`.
			`"""`
update for arrays 2019-04-05 16:17:50 +00:00			`function update!(x::AbstractArray, x̄)`
update docs and export update! 2020-02-26 19:27:39 +00:00			`x .-= x̄`
update for arrays 2019-04-05 16:17:50 +00:00			`end`

Further docstring improvements in src/ Some had to be re-done after the rebase 2020-04-04 20:59:45 +00:00			`"""`
			`update!(opt, p, g)`
			`update!(opt, ps::Params, gs)`

			Perform an update step of the parameters `ps` (or the single parameter `p`)
			according to optimizer `opt` and the gradients `gs` (the gradient `g`).

			`As a result, the parameters are mutated and the optimizer's internal state may change.`
			`"""`
extend update! with an optimiser 2019-01-28 14:10:09 +00:00			`function update!(opt, x, x̄)`
update cleanup 2019-08-19 14:44:51 +00:00			`x .-= apply!(opt, x, x̄)`
extend update! with an optimiser 2019-01-28 14:10:09 +00:00			`end`

simpler/nicer training loop 2019-02-28 14:58:42 +00:00			`function update!(opt, xs::Params, gs)`
			`for x in xs`
update cleanup 2019-08-19 14:44:51 +00:00			`gs[x] == nothing && continue`
simpler/nicer training loop 2019-02-28 14:58:42 +00:00			`update!(opt, x, gs[x])`
			`end`
			`end`

initial sketch 2018-05-31 19:29:59 +00:00			`# Callback niceties`
re-add removed call function 2018-11-08 13:14:57 +00:00			`call(f, xs...) = f(xs...)`
Allow array of optimisers to train! This allows an array of optimisers to be sent to `train!` 2017-11-04 12:27:32 +00:00			`runall(f) = f`
			`runall(fs::AbstractVector) = () -> foreach(call, fs)`
cb convenience 2017-09-07 04:27:16 +00:00
removed arguments from StopException 2018-08-20 08:38:23 +00:00			`struct StopException <: Exception end`
update cleanup 2019-08-19 14:44:51 +00:00
added docs 2018-08-20 08:50:33 +00:00			`"""`
			`stop()`

fixed docstring and not exporting stop 2018-08-21 18:59:07 +00:00			Call `Flux.stop()` in a callback to indicate when a callback condition is met.
Further docstring improvements in src/ Some had to be re-done after the rebase 2020-04-04 20:59:45 +00:00			`This will trigger the train loop to stop and exit.`
added docs 2018-08-20 08:50:33 +00:00
Improve docstrings Improvements like... - fixing typos, - removing trailing and double whitespaces, - using `jldoctest` blocks where applicable, - fixing, updating or correctly setting up existing doctests, - improving consistency (for example, always use "# Examples" instead of other variants), - removing empty lines between docstrings and functions, - instead of mentioning keywords, put them into the docstring, - adding some missing but useful keywords, - adding references (`@ref`), - using LaTeX math where applicable, and - linking papers. Debatable stuff that is untouched: - BE/AE s/z irregularities ("normalise" versus "normalize") since most papers use the AE version while the Flux source code was written with BE spelling. - Names of normalization functions are capitalized ("Batch Normalization" instead of "batch normalization"). 2019-08-31 09:39:28 +00:00			`# Examples`
added docs 2018-08-20 08:50:33 +00:00			```julia
			`cb = function ()`
fixed example in docs 2018-08-28 09:32:47 +00:00			`accuracy() > 0.9 && Flux.stop()`
added docs 2018-08-20 08:50:33 +00:00			`end`
			```
			`"""`
removed argument from stop function 2018-08-20 08:32:09 +00:00			`function stop()`
housekeeping: fixing typo 2018-08-20 08:18:28 +00:00			`throw(StopException())`
			`end`
moving stop to Optimise 2018-08-20 08:13:08 +00:00
add docsting to train! 2017-10-11 11:26:40 +00:00			`"""`
train docstring simplification 2019-01-10 11:05:21 +00:00			`train!(loss, params, data, opt; cb)`
add docsting to train! 2017-10-11 11:26:40 +00:00
Further docstring improvements in src/ Some had to be re-done after the rebase 2020-04-04 20:59:45 +00:00			For each datapoint `d` in `data` compute the gradient of `loss(d...)` through
			backpropagation and call the optimizer `opt`.
closes #127 2017-12-13 18:24:56 +00:00
Further docstring improvements in src/ Some had to be re-done after the rebase 2020-04-04 20:59:45 +00:00			In case datapoints `d` are of numeric array type, assume no splatting is needed
			and compute the gradient of `loss(d)`.
add DataLoader special case train! for the unsupervised data iterator 2020-02-26 12:48:27 +00:00
Further docstring improvements in src/ Some had to be re-done after the rebase 2020-04-04 20:59:45 +00:00			A callback is given with the keyword argument `cb`. For example, this will print
			"training" every 10 seconds (using [`Flux.throttle`](@ref)):
closes #127 2017-12-13 18:24:56 +00:00
add DataLoader special case train! for the unsupervised data iterator 2020-02-26 12:48:27 +00:00			`train!(loss, params, data, opt,`
			`cb = throttle(() -> println("training"), 10))`
closes #127 2017-12-13 18:24:56 +00:00
Further docstring improvements in src/ Some had to be re-done after the rebase 2020-04-04 20:59:45 +00:00			The callback can call [`Flux.stop`](@ref) to interrupt the training loop.
add docsting to train! 2017-10-11 11:26:40 +00:00
Allow array of optimisers to train! This allows an array of optimisers to be sent to `train!` 2017-11-04 12:27:32 +00:00			Multiple optimisers and callbacks can be passed to `opt` and `cb` as arrays.
add docsting to train! 2017-10-11 11:26:40 +00:00			`"""`
decay fixes 2018-10-27 13:56:42 +00:00			`function train!(loss, ps, data, opt; cb = () -> ())`
simpler/nicer training loop 2019-02-28 14:58:42 +00:00			`ps = Params(ps)`
modified training loop 2018-08-19 09:47:07 +00:00			`cb = runall(cb)`
add docsting to train! 2017-10-11 11:26:40 +00:00			`@progress for d in data`
modified training loop 2018-08-19 09:47:07 +00:00			`try`
restrict train! special casing 2020-02-27 19:49:05 +00:00			`if d isa AbstractArray{<:Number}`
add DataLoader special case train! for the unsupervised data iterator 2020-02-26 12:48:27 +00:00			`gs = gradient(ps) do`
			`loss(d)`
			`end`
			`else`
			`gs = gradient(ps) do`
			`loss(d...)`
			`end`
simpler/nicer training loop 2019-02-28 14:58:42 +00:00			`end`
			`update!(opt, ps, gs)`
Callback being called now 2019-06-19 19:07:54 +00:00			`cb()`
modified training loop 2018-08-19 09:47:07 +00:00			`catch ex`
catching exception 2018-08-19 12:08:00 +00:00			`if ex isa StopException`
modified training loop 2018-08-19 09:47:07 +00:00			`break`
			`else`
			`rethrow(ex)`
			`end`
added end 2018-08-19 09:49:45 +00:00			`end`
basic training loop 2017-08-24 10:42:29 +00:00			`end`
			`end`
move epochs 2018-03-05 22:56:22 +00:00
			`"""`
			`@epochs N body`

			Run `body` `N` times. Mainly useful for quickly doing multiple epochs of
			`training in a REPL.`

Improve docstrings Improvements like... - fixing typos, - removing trailing and double whitespaces, - using `jldoctest` blocks where applicable, - fixing, updating or correctly setting up existing doctests, - improving consistency (for example, always use "# Examples" instead of other variants), - removing empty lines between docstrings and functions, - instead of mentioning keywords, put them into the docstring, - adding some missing but useful keywords, - adding references (`@ref`), - using LaTeX math where applicable, and - linking papers. Debatable stuff that is untouched: - BE/AE s/z irregularities ("normalise" versus "normalize") since most papers use the AE version while the Flux source code was written with BE spelling. - Names of normalization functions are capitalized ("Batch Normalization" instead of "batch normalization"). 2019-08-31 09:39:28 +00:00			`# Examples`
			```jldoctest
			`julia> Flux.@epochs 2 println("hello")`
			`[ Info: Epoch 1`
move epochs 2018-03-05 22:56:22 +00:00			`hello`
Improve docstrings Improvements like... - fixing typos, - removing trailing and double whitespaces, - using `jldoctest` blocks where applicable, - fixing, updating or correctly setting up existing doctests, - improving consistency (for example, always use "# Examples" instead of other variants), - removing empty lines between docstrings and functions, - instead of mentioning keywords, put them into the docstring, - adding some missing but useful keywords, - adding references (`@ref`), - using LaTeX math where applicable, and - linking papers. Debatable stuff that is untouched: - BE/AE s/z irregularities ("normalise" versus "normalize") since most papers use the AE version while the Flux source code was written with BE spelling. - Names of normalization functions are capitalized ("Batch Normalization" instead of "batch normalization"). 2019-08-31 09:39:28 +00:00			`[ Info: Epoch 2`
move epochs 2018-03-05 22:56:22 +00:00			`hello`
			```
			`"""`
			`macro epochs(n, ex)`
			`:(@progress for i = 1:$(esc(n))`
Use @info for logging 2018-08-11 13:42:33 +00:00			`@info "Epoch $i"`
move epochs 2018-03-05 22:56:22 +00:00			`$(esc(ex))`
			`end)`
newlines 2018-08-28 09:54:50 +00:00			`end`