From dc6cd29bc9b42e38231f152871caffbdf06c950e Mon Sep 17 00:00:00 2001
From: zeptodoctor <44736852+zeptodoctor@users.noreply.github.com>
Date: Tue, 12 May 2020 15:14:25 +0000
Subject: [PATCH] build based on a84e08c

---
 dev/community/index.html             |  2 +-
 dev/data/dataloader/index.html       |  2 +-
 dev/data/onehot/index.html           |  6 ++---
 dev/datasets/index.html              | 12 +++++-----
 dev/ecosystem/index.html             |  2 +-
 dev/gpu/index.html                   |  2 +-
 dev/index.html                       |  2 +-
 dev/models/advanced/index.html       |  2 +-
 dev/models/basics/index.html         |  6 ++---
 dev/models/layers/index.html         | 34 ++++++++++++++--------------
 dev/models/nnlib/index.html          |  2 +-
 dev/models/recurrence/index.html     |  2 +-
 dev/models/regularisation/index.html |  2 +-
 dev/performance/index.html           |  2 +-
 dev/saving/index.html                |  2 +-
 dev/search/index.html                |  2 +-
 dev/training/optimisers/index.html   | 30 ++++++++++++------------
 dev/training/training/index.html     |  6 ++---
 dev/utilities/index.html             | 24 ++++++++++----------
 19 files changed, 71 insertions(+), 71 deletions(-)
diff --git a/dev/community/index.html b/dev/community/index.html
index 1632b5d2..37583e39 100644
--- a/dev/community/index.html
+++ b/dev/community/index.html
@@ -6,4 +6,4 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview', {'page': location.pathname + location.search + location.hash});
-</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action="../search/"><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li><a class="tocitem" href="../">Home</a></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="../models/basics/">Basics</a></li><li><a class="tocitem" href="../models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="../models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="../models/layers/">Model Reference</a></li><li><a class="tocitem" href="../models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="../models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="../data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="../training/optimisers/">Optimisers</a></li><li><a class="tocitem" href="../training/training/">Training</a></li></ul></li><li><a class="tocitem" href="../gpu/">GPU Support</a></li><li><a class="tocitem" href="../saving/">Saving &amp; Loading</a></li><li><a class="tocitem" href="../ecosystem/">The Julia Ecosystem</a></li><li><a class="tocitem" href="../utilities/">Utility Functions</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><a class="tocitem" href="../datasets/">Datasets</a></li><li class="is-active"><a class="tocitem" href>Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Community</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Community</a></li></ul></nav><div class="docs-right"><a class="docs-edit-link" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/community.md" title="Edit on GitHub"><span class="docs-icon fab"></span><span class="docs-label is-hidden-touch">Edit on GitHub</span></a><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article class="content" id="documenter-page"><h1 id="Community-1"><a class="docs-heading-anchor" href="#Community-1">Community</a><a class="docs-heading-anchor-permalink" href="#Community-1" title="Permalink"></a></h1><p>All Flux users are welcome to join our community on the <a href="https://discourse.julialang.org/">Julia forum</a>, or the <a href="https://discourse.julialang.org/t/announcing-a-julia-slack/4866">slack</a> (channel #machine-learning). If you have questions or issues we&#39;ll try to help you out.</p><p>If you&#39;re interested in hacking on Flux, the <a href="https://github.com/FluxML/Flux.jl">source code</a> is open and easy to understand – it&#39;s all just the same Julia code you work with normally. You might be interested in our <a href="https://github.com/FluxML/Flux.jl/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22">intro issues</a> to get started.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../datasets/">« Datasets</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action="../search/"><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li><a class="tocitem" href="../">Home</a></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="../models/basics/">Basics</a></li><li><a class="tocitem" href="../models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="../models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="../models/layers/">Model Reference</a></li><li><a class="tocitem" href="../models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="../models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="../data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="../training/optimisers/">Optimisers</a></li><li><a class="tocitem" href="../training/training/">Training</a></li></ul></li><li><a class="tocitem" href="../gpu/">GPU Support</a></li><li><a class="tocitem" href="../saving/">Saving &amp; Loading</a></li><li><a class="tocitem" href="../ecosystem/">The Julia Ecosystem</a></li><li><a class="tocitem" href="../utilities/">Utility Functions</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><a class="tocitem" href="../datasets/">Datasets</a></li><li class="is-active"><a class="tocitem" href>Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Community</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Community</a></li></ul></nav><div class="docs-right"><a class="docs-edit-link" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/community.md" title="Edit on GitHub"><span class="docs-icon fab"></span><span class="docs-label is-hidden-touch">Edit on GitHub</span></a><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article class="content" id="documenter-page"><h1 id="Community-1"><a class="docs-heading-anchor" href="#Community-1">Community</a><a class="docs-heading-anchor-permalink" href="#Community-1" title="Permalink"></a></h1><p>All Flux users are welcome to join our community on the <a href="https://discourse.julialang.org/">Julia forum</a>, or the <a href="https://discourse.julialang.org/t/announcing-a-julia-slack/4866">slack</a> (channel #machine-learning). If you have questions or issues we&#39;ll try to help you out.</p><p>If you&#39;re interested in hacking on Flux, the <a href="https://github.com/FluxML/Flux.jl">source code</a> is open and easy to understand – it&#39;s all just the same Julia code you work with normally. You might be interested in our <a href="https://github.com/FluxML/Flux.jl/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22">intro issues</a> to get started.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../datasets/">« Datasets</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/data/dataloader/index.html b/dev/data/dataloader/index.html
index 2b63c850..93fe4def 100644
--- a/dev/data/dataloader/index.html
+++ b/dev/data/dataloader/index.html
@@ -29,4 +29,4 @@ end
 
 # train for 10 epochs
 using IterTools: ncycle 
-Flux.train!(loss, ps, ncycle(train_loader, 10), opt)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/dataloader.jl#L13-L54">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../onehot/">« One-Hot Encoding</a><a class="docs-footer-nextpage" href="../../training/optimisers/">Optimisers »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+Flux.train!(loss, ps, ncycle(train_loader, 10), opt)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/dataloader.jl#L13-L54">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../onehot/">« One-Hot Encoding</a><a class="docs-footer-nextpage" href="../../training/optimisers/">Optimisers »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/data/onehot/index.html b/dev/data/onehot/index.html
index cececb72..469c5a85 100644
--- a/dev/data/onehot/index.html
+++ b/dev/data/onehot/index.html
@@ -35,11 +35,11 @@ julia&gt; Flux.onehot(:c, [:a, :b, :c])
 3-element Flux.OneHotVector:
  0
  0
- 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/onehot.jl#L45-L67">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.onecold" href="#Flux.onecold"><code>Flux.onecold</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">onecold(y[, labels = 1:length(y)])</code></pre><p>Inverse operations of <a href="#Flux.onehot"><code>onehot</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.onecold([true, false, false], [:a, :b, :c])
+ 1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/onehot.jl#L45-L67">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.onecold" href="#Flux.onecold"><code>Flux.onecold</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">onecold(y[, labels = 1:length(y)])</code></pre><p>Inverse operations of <a href="#Flux.onehot"><code>onehot</code></a>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.onecold([true, false, false], [:a, :b, :c])
 :a
 
 julia&gt; Flux.onecold([0.3, 0.2, 0.5], [:a, :b, :c])
-:c</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/onehot.jl#L102-L115">source</a></section></article><h2 id="Batches-1"><a class="docs-heading-anchor" href="#Batches-1">Batches</a><a class="docs-heading-anchor-permalink" href="#Batches-1" title="Permalink"></a></h2><p><code>onehotbatch</code> creates a batch (matrix) of one-hot vectors, and <code>onecold</code> treats matrices as batches.</p><pre><code class="language-julia">julia&gt; using Flux: onehotbatch
+:c</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/onehot.jl#L102-L115">source</a></section></article><h2 id="Batches-1"><a class="docs-heading-anchor" href="#Batches-1">Batches</a><a class="docs-heading-anchor-permalink" href="#Batches-1" title="Permalink"></a></h2><p><code>onehotbatch</code> creates a batch (matrix) of one-hot vectors, and <code>onecold</code> treats matrices as batches.</p><pre><code class="language-julia">julia&gt; using Flux: onehotbatch
 
 julia&gt; onehotbatch([:b, :a, :b], [:a, :b, :c])
 3×3 Flux.OneHotMatrix:
@@ -55,4 +55,4 @@ julia&gt; onecold(ans, [:a, :b, :c])
 3×3 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
  0  1  0
  1  0  1
- 0  0  0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/onehot.jl#L80-L96">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../models/nnlib/">« NNlib</a><a class="docs-footer-nextpage" href="../dataloader/">DataLoader »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+ 0  0  0</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/onehot.jl#L80-L96">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../models/nnlib/">« NNlib</a><a class="docs-footer-nextpage" href="../dataloader/">DataLoader »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/datasets/index.html b/dev/datasets/index.html
index 29f2264c..2e2f24d8 100644
--- a/dev/datasets/index.html
+++ b/dev/datasets/index.html
@@ -16,14 +16,14 @@ julia&gt; features[:, 1]
  5.1
  3.5
  1.4
- 0.2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/iris.jl#L51-L71">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.Iris.labels-Tuple{}" href="#Flux.Data.Iris.labels-Tuple{}"><code>Flux.Data.Iris.labels</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">labels()</code></pre><p>Get the labels of the iris dataset, a 150 element array of strings listing the species of each example.</p><pre><code class="language-julia-repl">julia&gt; labels = Flux.Data.Iris.labels();
+ 0.2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/iris.jl#L51-L71">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.Iris.labels-Tuple{}" href="#Flux.Data.Iris.labels-Tuple{}"><code>Flux.Data.Iris.labels</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">labels()</code></pre><p>Get the labels of the iris dataset, a 150 element array of strings listing the species of each example.</p><pre><code class="language-julia-repl">julia&gt; labels = Flux.Data.Iris.labels();
 
 julia&gt; summary(labels)
 &quot;150-element Array{String,1}&quot;
 
 julia&gt; labels[1]
-&quot;Iris-setosa&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/iris.jl#L29-L44">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.MNIST.images-Tuple{}" href="#Flux.Data.MNIST.images-Tuple{}"><code>Flux.Data.MNIST.images</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">images()
-images(:test)</code></pre><p>Load the MNIST images.</p><p>Each image is a 28×28 array of <code>Gray</code> colour values (see <a href="https://github.com/JuliaGraphics/Colors.jl">Colors.jl</a>).</p><p>Return the 60,000 training images by default; pass <code>:test</code> to retrieve the 10,000 test images.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/mnist.jl#L80-L91">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.MNIST.labels-Tuple{}" href="#Flux.Data.MNIST.labels-Tuple{}"><code>Flux.Data.MNIST.labels</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">labels()
-labels(:test)</code></pre><p>Load the labels corresponding to each of the images returned from <a href="#Flux.Data.MNIST.images-Tuple{}"><code>images()</code></a>. Each label is a number from 0-9.</p><p>Return the 60,000 training labels by default; pass <code>:test</code> to retrieve the 10,000 test labels.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/mnist.jl#L99-L108">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.FashionMNIST.images-Tuple{}" href="#Flux.Data.FashionMNIST.images-Tuple{}"><code>Flux.Data.FashionMNIST.images</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">images()
-images(:test)</code></pre><p>Load the Fashion-MNIST images.</p><p>Each image is a 28×28 array of <code>Gray</code> colour values (see <a href="https://github.com/JuliaGraphics/Colors.jl">Colors.jl</a>).</p><p>Return the 60,000 training images by default; pass <code>:test</code> to retrieve the 10,000 test images.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/fashion-mnist.jl#L30-L41">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.FashionMNIST.labels-Tuple{}" href="#Flux.Data.FashionMNIST.labels-Tuple{}"><code>Flux.Data.FashionMNIST.labels</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">labels()
-labels(:test)</code></pre><p>Load the labels corresponding to each of the images returned from <a href="#Flux.Data.FashionMNIST.images-Tuple{}"><code>images()</code></a>. Each label is a number from 0-9.</p><p>Return the 60,000 training labels by default; pass <code>:test</code> to retrieve the 10,000 test labels.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/fashion-mnist.jl#L49-L58">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.CMUDict.phones-Tuple{}" href="#Flux.Data.CMUDict.phones-Tuple{}"><code>Flux.Data.CMUDict.phones</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">phones()</code></pre><p>Return a <code>Vector</code> containing the phones used in the CMU Pronouncing Dictionary.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/cmudict.jl#L27-L31">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.CMUDict.symbols-Tuple{}" href="#Flux.Data.CMUDict.symbols-Tuple{}"><code>Flux.Data.CMUDict.symbols</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">symbols()</code></pre><p>Return a <code>Vector</code> containing the symbols used in the CMU Pronouncing Dictionary. A symbol is a phone with optional auxiliary symbols, indicating for example the amount of stress on the phone.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/cmudict.jl#L38-L44">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.CMUDict.rawdict-Tuple{}" href="#Flux.Data.CMUDict.rawdict-Tuple{}"><code>Flux.Data.CMUDict.rawdict</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">rawdict()</code></pre><p>Return the unfiltered CMU Pronouncing Dictionary.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/cmudict.jl#L51-L55">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.CMUDict.cmudict-Tuple{}" href="#Flux.Data.CMUDict.cmudict-Tuple{}"><code>Flux.Data.CMUDict.cmudict</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">cmudict()</code></pre><p>Return a filtered CMU Pronouncing Dictionary.</p><p>It is filtered so each word contains only ASCII characters and a combination of word characters (as determined by the regex engine using <code>\w</code>), &#39;-&#39; and &#39;.&#39;.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/cmudict.jl#L64-L71">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.Sentiment.train-Tuple{}" href="#Flux.Data.Sentiment.train-Tuple{}"><code>Flux.Data.Sentiment.train</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">train()</code></pre><p>Return the train split of the Stanford Sentiment Treebank. The data is in <a href="https://en.wikipedia.org/wiki/Treebank">treebank</a> format.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/sentiment.jl#L43-L48">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.Sentiment.test-Tuple{}" href="#Flux.Data.Sentiment.test-Tuple{}"><code>Flux.Data.Sentiment.test</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">test()</code></pre><p>Return the test split of the Stanford Sentiment Treebank. The data is in <a href="https://en.wikipedia.org/wiki/Treebank">treebank</a> format.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/sentiment.jl#L51-L56">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.Sentiment.dev-Tuple{}" href="#Flux.Data.Sentiment.dev-Tuple{}"><code>Flux.Data.Sentiment.dev</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">dev()</code></pre><p>Return the dev split of the Stanford Sentiment Treebank. The data is in <a href="https://en.wikipedia.org/wiki/Treebank">treebank</a> format.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/data/sentiment.jl#L59-L64">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../performance/">« Performance Tips</a><a class="docs-footer-nextpage" href="../community/">Community »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+&quot;Iris-setosa&quot;</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/iris.jl#L29-L44">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.MNIST.images-Tuple{}" href="#Flux.Data.MNIST.images-Tuple{}"><code>Flux.Data.MNIST.images</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">images()
+images(:test)</code></pre><p>Load the MNIST images.</p><p>Each image is a 28×28 array of <code>Gray</code> colour values (see <a href="https://github.com/JuliaGraphics/Colors.jl">Colors.jl</a>).</p><p>Return the 60,000 training images by default; pass <code>:test</code> to retrieve the 10,000 test images.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/mnist.jl#L80-L91">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.MNIST.labels-Tuple{}" href="#Flux.Data.MNIST.labels-Tuple{}"><code>Flux.Data.MNIST.labels</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">labels()
+labels(:test)</code></pre><p>Load the labels corresponding to each of the images returned from <a href="#Flux.Data.MNIST.images-Tuple{}"><code>images()</code></a>. Each label is a number from 0-9.</p><p>Return the 60,000 training labels by default; pass <code>:test</code> to retrieve the 10,000 test labels.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/mnist.jl#L99-L108">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.FashionMNIST.images-Tuple{}" href="#Flux.Data.FashionMNIST.images-Tuple{}"><code>Flux.Data.FashionMNIST.images</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">images()
+images(:test)</code></pre><p>Load the Fashion-MNIST images.</p><p>Each image is a 28×28 array of <code>Gray</code> colour values (see <a href="https://github.com/JuliaGraphics/Colors.jl">Colors.jl</a>).</p><p>Return the 60,000 training images by default; pass <code>:test</code> to retrieve the 10,000 test images.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/fashion-mnist.jl#L30-L41">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.FashionMNIST.labels-Tuple{}" href="#Flux.Data.FashionMNIST.labels-Tuple{}"><code>Flux.Data.FashionMNIST.labels</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">labels()
+labels(:test)</code></pre><p>Load the labels corresponding to each of the images returned from <a href="#Flux.Data.FashionMNIST.images-Tuple{}"><code>images()</code></a>. Each label is a number from 0-9.</p><p>Return the 60,000 training labels by default; pass <code>:test</code> to retrieve the 10,000 test labels.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/fashion-mnist.jl#L49-L58">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.CMUDict.phones-Tuple{}" href="#Flux.Data.CMUDict.phones-Tuple{}"><code>Flux.Data.CMUDict.phones</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">phones()</code></pre><p>Return a <code>Vector</code> containing the phones used in the CMU Pronouncing Dictionary.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/cmudict.jl#L27-L31">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.CMUDict.symbols-Tuple{}" href="#Flux.Data.CMUDict.symbols-Tuple{}"><code>Flux.Data.CMUDict.symbols</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">symbols()</code></pre><p>Return a <code>Vector</code> containing the symbols used in the CMU Pronouncing Dictionary. A symbol is a phone with optional auxiliary symbols, indicating for example the amount of stress on the phone.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/cmudict.jl#L38-L44">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.CMUDict.rawdict-Tuple{}" href="#Flux.Data.CMUDict.rawdict-Tuple{}"><code>Flux.Data.CMUDict.rawdict</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">rawdict()</code></pre><p>Return the unfiltered CMU Pronouncing Dictionary.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/cmudict.jl#L51-L55">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.CMUDict.cmudict-Tuple{}" href="#Flux.Data.CMUDict.cmudict-Tuple{}"><code>Flux.Data.CMUDict.cmudict</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">cmudict()</code></pre><p>Return a filtered CMU Pronouncing Dictionary.</p><p>It is filtered so each word contains only ASCII characters and a combination of word characters (as determined by the regex engine using <code>\w</code>), &#39;-&#39; and &#39;.&#39;.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/cmudict.jl#L64-L71">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.Sentiment.train-Tuple{}" href="#Flux.Data.Sentiment.train-Tuple{}"><code>Flux.Data.Sentiment.train</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">train()</code></pre><p>Return the train split of the Stanford Sentiment Treebank. The data is in <a href="https://en.wikipedia.org/wiki/Treebank">treebank</a> format.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/sentiment.jl#L43-L48">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.Sentiment.test-Tuple{}" href="#Flux.Data.Sentiment.test-Tuple{}"><code>Flux.Data.Sentiment.test</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">test()</code></pre><p>Return the test split of the Stanford Sentiment Treebank. The data is in <a href="https://en.wikipedia.org/wiki/Treebank">treebank</a> format.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/sentiment.jl#L51-L56">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Data.Sentiment.dev-Tuple{}" href="#Flux.Data.Sentiment.dev-Tuple{}"><code>Flux.Data.Sentiment.dev</code></a> — <span class="docstring-category">Method</span></header><section><div><pre><code class="language-julia">dev()</code></pre><p>Return the dev split of the Stanford Sentiment Treebank. The data is in <a href="https://en.wikipedia.org/wiki/Treebank">treebank</a> format.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/data/sentiment.jl#L59-L64">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../performance/">« Performance Tips</a><a class="docs-footer-nextpage" href="../community/">Community »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/ecosystem/index.html b/dev/ecosystem/index.html
index c0d63d0a..b6956b99 100644
--- a/dev/ecosystem/index.html
+++ b/dev/ecosystem/index.html
@@ -6,4 +6,4 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview', {'page': location.pathname + location.search + location.hash});
-</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action="../search/"><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li><a class="tocitem" href="../">Home</a></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="../models/basics/">Basics</a></li><li><a class="tocitem" href="../models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="../models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="../models/layers/">Model Reference</a></li><li><a class="tocitem" href="../models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="../models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="../data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="../training/optimisers/">Optimisers</a></li><li><a class="tocitem" href="../training/training/">Training</a></li></ul></li><li><a class="tocitem" href="../gpu/">GPU Support</a></li><li><a class="tocitem" href="../saving/">Saving &amp; Loading</a></li><li class="is-active"><a class="tocitem" href>The Julia Ecosystem</a></li><li><a class="tocitem" href="../utilities/">Utility Functions</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><a class="tocitem" href="../datasets/">Datasets</a></li><li><a class="tocitem" href="../community/">Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>The Julia Ecosystem</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>The Julia Ecosystem</a></li></ul></nav><div class="docs-right"><a class="docs-edit-link" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/ecosystem.md" title="Edit on GitHub"><span class="docs-icon fab"></span><span class="docs-label is-hidden-touch">Edit on GitHub</span></a><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article class="content" id="documenter-page"><h1 id="The-Julia-Ecosystem-1"><a class="docs-heading-anchor" href="#The-Julia-Ecosystem-1">The Julia Ecosystem</a><a class="docs-heading-anchor-permalink" href="#The-Julia-Ecosystem-1" title="Permalink"></a></h1><p>One of the main strengths of Julia lies in an ecosystem of packages  globally providing a rich and consistent user experience.</p><p>This is a non-exhaustive list of Julia packages, nicely complementing <code>Flux</code> in typical machine learning and deep learning workflows:</p><ul><li><a href="https://github.com/carlobaldassi/ArgParse.jl">ArgParse.jl</a>: package for parsing command-line arguments to Julia programs.</li><li><a href="https://github.com/Evizero/Augmentor.jl">Augmentor.jl</a>: a fast image augmentation library in Julia for machine learning.</li><li><a href="https://github.com/JuliaIO/BSON.jl">BSON.jl</a>: package for working with the Binary JSON serialisation format</li><li><a href="https://github.com/joshday/OnlineStats.jl">DataFrames.jl</a>: in-memory tabular data in Julia</li><li><a href="https://github.com/JuliaDynamics/DrWatson.jl">DrWatson.jl</a>:  a scientific project assistant software</li><li><a href="https://github.com/JuliaML/MLDatasets.jl">MLDatasets.jl</a>: utility package for accessing common machine learning datasets</li><li><a href="https://github.com/joshday/OnlineStats.jl">OnlineStats.jl</a>: single-pass algorithms for statistics</li><li><a href="https://github.com/mauro3/Parameters.jl">Parameters.jl</a>: types with default field values, keyword constructors and (un-)pack macros</li><li><a href="https://github.com/timholy/ProgressMeter.jl">ProgressMeters.jl</a>: progress meters for long-running computations</li><li><a href="https://github.com/PhilipVinc/TensorBoardLogger.jl">TensorBoardLogger.jl</a>: easy peasy logging to <a href="https://www.tensorflow.org/tensorboard">tensorboard</a> in Julia</li></ul><p>This tight integration among Julia pakages is shown in some of the examples in the <a href="https://github.com/FluxML/model-zoo">model-zoo</a> repository.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../saving/">« Saving &amp; Loading</a><a class="docs-footer-nextpage" href="../utilities/">Utility Functions »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action="../search/"><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li><a class="tocitem" href="../">Home</a></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="../models/basics/">Basics</a></li><li><a class="tocitem" href="../models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="../models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="../models/layers/">Model Reference</a></li><li><a class="tocitem" href="../models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="../models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="../data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="../training/optimisers/">Optimisers</a></li><li><a class="tocitem" href="../training/training/">Training</a></li></ul></li><li><a class="tocitem" href="../gpu/">GPU Support</a></li><li><a class="tocitem" href="../saving/">Saving &amp; Loading</a></li><li class="is-active"><a class="tocitem" href>The Julia Ecosystem</a></li><li><a class="tocitem" href="../utilities/">Utility Functions</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><a class="tocitem" href="../datasets/">Datasets</a></li><li><a class="tocitem" href="../community/">Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>The Julia Ecosystem</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>The Julia Ecosystem</a></li></ul></nav><div class="docs-right"><a class="docs-edit-link" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/ecosystem.md" title="Edit on GitHub"><span class="docs-icon fab"></span><span class="docs-label is-hidden-touch">Edit on GitHub</span></a><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article class="content" id="documenter-page"><h1 id="The-Julia-Ecosystem-1"><a class="docs-heading-anchor" href="#The-Julia-Ecosystem-1">The Julia Ecosystem</a><a class="docs-heading-anchor-permalink" href="#The-Julia-Ecosystem-1" title="Permalink"></a></h1><p>One of the main strengths of Julia lies in an ecosystem of packages  globally providing a rich and consistent user experience.</p><p>This is a non-exhaustive list of Julia packages, nicely complementing <code>Flux</code> in typical machine learning and deep learning workflows:</p><ul><li><a href="https://github.com/carlobaldassi/ArgParse.jl">ArgParse.jl</a>: package for parsing command-line arguments to Julia programs.</li><li><a href="https://github.com/Evizero/Augmentor.jl">Augmentor.jl</a>: a fast image augmentation library in Julia for machine learning.</li><li><a href="https://github.com/JuliaIO/BSON.jl">BSON.jl</a>: package for working with the Binary JSON serialisation format</li><li><a href="https://github.com/joshday/OnlineStats.jl">DataFrames.jl</a>: in-memory tabular data in Julia</li><li><a href="https://github.com/JuliaDynamics/DrWatson.jl">DrWatson.jl</a>:  a scientific project assistant software</li><li><a href="https://github.com/JuliaML/MLDatasets.jl">MLDatasets.jl</a>: utility package for accessing common machine learning datasets</li><li><a href="https://github.com/joshday/OnlineStats.jl">OnlineStats.jl</a>: single-pass algorithms for statistics</li><li><a href="https://github.com/mauro3/Parameters.jl">Parameters.jl</a>: types with default field values, keyword constructors and (un-)pack macros</li><li><a href="https://github.com/timholy/ProgressMeter.jl">ProgressMeters.jl</a>: progress meters for long-running computations</li><li><a href="https://github.com/PhilipVinc/TensorBoardLogger.jl">TensorBoardLogger.jl</a>: easy peasy logging to <a href="https://www.tensorflow.org/tensorboard">tensorboard</a> in Julia</li></ul><p>This tight integration among Julia pakages is shown in some of the examples in the <a href="https://github.com/FluxML/model-zoo">model-zoo</a> repository.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../saving/">« Saving &amp; Loading</a><a class="docs-footer-nextpage" href="../utilities/">Utility Functions »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/gpu/index.html b/dev/gpu/index.html
index fa11e41f..85715590 100644
--- a/dev/gpu/index.html
+++ b/dev/gpu/index.html
@@ -47,4 +47,4 @@ julia&gt; x |&gt; cpu
 10-element Array{Float32,1}:
  0.235164
  ⋮
- 0.192538</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../training/training/">« Training</a><a class="docs-footer-nextpage" href="../saving/">Saving &amp; Loading »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+ 0.192538</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../training/training/">« Training</a><a class="docs-footer-nextpage" href="../saving/">Saving &amp; Loading »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/index.html b/dev/index.html
index fc7c7a02..34d9e229 100644
--- a/dev/index.html
+++ b/dev/index.html
@@ -6,4 +6,4 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview', {'page': location.pathname + location.search + location.hash});
-</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action="search/"><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>Home</a><ul class="internal"><li><a class="tocitem" href="#Installation-1"><span>Installation</span></a></li><li><a class="tocitem" href="#Learning-Flux-1"><span>Learning Flux</span></a></li></ul></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="models/basics/">Basics</a></li><li><a class="tocitem" href="models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="models/layers/">Model Reference</a></li><li><a class="tocitem" href="models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="training/optimisers/">Optimisers</a></li><li><a class="tocitem" href="training/training/">Training</a></li></ul></li><li><a class="tocitem" href="gpu/">GPU Support</a></li><li><a class="tocitem" href="saving/">Saving &amp; Loading</a></li><li><a class="tocitem" href="ecosystem/">The Julia Ecosystem</a></li><li><a class="tocitem" href="utilities/">Utility Functions</a></li><li><a class="tocitem" href="performance/">Performance Tips</a></li><li><a class="tocitem" href="datasets/">Datasets</a></li><li><a class="tocitem" href="community/">Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Home</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Home</a></li></ul></nav><div class="docs-right"><a class="docs-edit-link" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/index.md" title="Edit on GitHub"><span class="docs-icon fab"></span><span class="docs-label is-hidden-touch">Edit on GitHub</span></a><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article class="content" id="documenter-page"><h1 id="Flux:-The-Julia-Machine-Learning-Library-1"><a class="docs-heading-anchor" href="#Flux:-The-Julia-Machine-Learning-Library-1">Flux: The Julia Machine Learning Library</a><a class="docs-heading-anchor-permalink" href="#Flux:-The-Julia-Machine-Learning-Library-1" title="Permalink"></a></h1><p>Flux is a library for machine learning. It comes &quot;batteries-included&quot; with many useful tools built in, but also lets you use the full power of the Julia language where you need it. We follow a few key principles:</p><ul><li><strong>Doing the obvious thing</strong>. Flux has relatively few explicit APIs for features like regularisation or embeddings. Instead, writing down the mathematical form will work – and be fast.</li><li><strong>You could have written Flux</strong>. All of it, from <a href="https://github.com/FluxML/Flux.jl/blob/ec16a2c77dbf6ab8b92b0eecd11661be7a62feef/src/layers/recurrent.jl#L131">LSTMs</a> to <a href="https://github.com/JuliaGPU/CuArrays.jl">GPU kernels</a>, is straightforward Julia code. When in doubt, it’s well worth looking at <a href="https://github.com/FluxML/Flux.jl/">the source</a>. If you need something different, you can easily roll your own.</li><li><strong>Play nicely with others</strong>. Flux works well with Julia libraries from <a href="https://github.com/JuliaComputing/JuliaDB.jl">data frames</a> and <a href="https://github.com/JuliaImages/Images.jl">images</a> to <a href="https://github.com/JuliaDiffEq/DifferentialEquations.jl">differential equation solvers</a>, so you can easily build complex data processing pipelines that integrate Flux models.</li></ul><h2 id="Installation-1"><a class="docs-heading-anchor" href="#Installation-1">Installation</a><a class="docs-heading-anchor-permalink" href="#Installation-1" title="Permalink"></a></h2><p>Download <a href="https://julialang.org/">Julia 1.0</a> or later, if you haven&#39;t already. You can add Flux from using Julia&#39;s package manager, by typing <code>] add Flux</code> in the Julia prompt.</p><p>If you have CUDA you can also run <code>] add CuArrays</code> to get GPU support; see <a href="gpu/">here</a> for more details.</p><h2 id="Learning-Flux-1"><a class="docs-heading-anchor" href="#Learning-Flux-1">Learning Flux</a><a class="docs-heading-anchor-permalink" href="#Learning-Flux-1" title="Permalink"></a></h2><p>There are several different ways to learn Flux. If you just want to get started writing models, the <a href="https://github.com/FluxML/model-zoo/">model zoo</a> gives good starting points for many common ones. This documentation provides a reference to all of Flux&#39;s APIs, as well as a from-scratch introduction to Flux&#39;s take on models and how they work. Once you understand these docs, congratulations, you also understand <a href="https://github.com/FluxML/Flux.jl">Flux&#39;s source code</a>, which is intended to be concise, legible and a good reference for more advanced concepts.</p></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="models/basics/">Basics »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action="search/"><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li class="is-active"><a class="tocitem" href>Home</a><ul class="internal"><li><a class="tocitem" href="#Installation-1"><span>Installation</span></a></li><li><a class="tocitem" href="#Learning-Flux-1"><span>Learning Flux</span></a></li></ul></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="models/basics/">Basics</a></li><li><a class="tocitem" href="models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="models/layers/">Model Reference</a></li><li><a class="tocitem" href="models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="training/optimisers/">Optimisers</a></li><li><a class="tocitem" href="training/training/">Training</a></li></ul></li><li><a class="tocitem" href="gpu/">GPU Support</a></li><li><a class="tocitem" href="saving/">Saving &amp; Loading</a></li><li><a class="tocitem" href="ecosystem/">The Julia Ecosystem</a></li><li><a class="tocitem" href="utilities/">Utility Functions</a></li><li><a class="tocitem" href="performance/">Performance Tips</a></li><li><a class="tocitem" href="datasets/">Datasets</a></li><li><a class="tocitem" href="community/">Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Home</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Home</a></li></ul></nav><div class="docs-right"><a class="docs-edit-link" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/index.md" title="Edit on GitHub"><span class="docs-icon fab"></span><span class="docs-label is-hidden-touch">Edit on GitHub</span></a><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article class="content" id="documenter-page"><h1 id="Flux:-The-Julia-Machine-Learning-Library-1"><a class="docs-heading-anchor" href="#Flux:-The-Julia-Machine-Learning-Library-1">Flux: The Julia Machine Learning Library</a><a class="docs-heading-anchor-permalink" href="#Flux:-The-Julia-Machine-Learning-Library-1" title="Permalink"></a></h1><p>Flux is a library for machine learning. It comes &quot;batteries-included&quot; with many useful tools built in, but also lets you use the full power of the Julia language where you need it. We follow a few key principles:</p><ul><li><strong>Doing the obvious thing</strong>. Flux has relatively few explicit APIs for features like regularisation or embeddings. Instead, writing down the mathematical form will work – and be fast.</li><li><strong>You could have written Flux</strong>. All of it, from <a href="https://github.com/FluxML/Flux.jl/blob/ec16a2c77dbf6ab8b92b0eecd11661be7a62feef/src/layers/recurrent.jl#L131">LSTMs</a> to <a href="https://github.com/JuliaGPU/CuArrays.jl">GPU kernels</a>, is straightforward Julia code. When in doubt, it’s well worth looking at <a href="https://github.com/FluxML/Flux.jl/">the source</a>. If you need something different, you can easily roll your own.</li><li><strong>Play nicely with others</strong>. Flux works well with Julia libraries from <a href="https://github.com/JuliaComputing/JuliaDB.jl">data frames</a> and <a href="https://github.com/JuliaImages/Images.jl">images</a> to <a href="https://github.com/JuliaDiffEq/DifferentialEquations.jl">differential equation solvers</a>, so you can easily build complex data processing pipelines that integrate Flux models.</li></ul><h2 id="Installation-1"><a class="docs-heading-anchor" href="#Installation-1">Installation</a><a class="docs-heading-anchor-permalink" href="#Installation-1" title="Permalink"></a></h2><p>Download <a href="https://julialang.org/">Julia 1.0</a> or later, if you haven&#39;t already. You can add Flux from using Julia&#39;s package manager, by typing <code>] add Flux</code> in the Julia prompt.</p><p>If you have CUDA you can also run <code>] add CuArrays</code> to get GPU support; see <a href="gpu/">here</a> for more details.</p><h2 id="Learning-Flux-1"><a class="docs-heading-anchor" href="#Learning-Flux-1">Learning Flux</a><a class="docs-heading-anchor-permalink" href="#Learning-Flux-1" title="Permalink"></a></h2><p>There are several different ways to learn Flux. If you just want to get started writing models, the <a href="https://github.com/FluxML/model-zoo/">model zoo</a> gives good starting points for many common ones. This documentation provides a reference to all of Flux&#39;s APIs, as well as a from-scratch introduction to Flux&#39;s take on models and how they work. Once you understand these docs, congratulations, you also understand <a href="https://github.com/FluxML/Flux.jl">Flux&#39;s source code</a>, which is intended to be concise, legible and a good reference for more advanced concepts.</p></article><nav class="docs-footer"><a class="docs-footer-nextpage" href="models/basics/">Basics »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/models/advanced/index.html b/dev/models/advanced/index.html
index cdac8905..a2579b73 100644
--- a/dev/models/advanced/index.html
+++ b/dev/models/advanced/index.html
@@ -24,4 +24,4 @@ Params([[0.66722 0.774872 0.249809; 0.843321 0.403843 0.429232; 0.683525 0.66245
     )
 
 ps = Flux.params(m[3:end])</code></pre><p>The <code>Zygote.Params</code> object <code>ps</code> now holds a reference to only the parameters of the layers passed to it.</p><p>During training, the gradients will only be computed for (and applied to) the last <code>Dense</code> layer, therefore only that would have its parameters changed.</p><p><code>Flux.params</code> also takes multiple inputs to make it easy to collect parameters from heterogenous models with a single call. A simple demonstration would be if we wanted to omit optimising the second <code>Dense</code> layer in the previous example. It would look something like this:</p><pre><code class="language-julia">Flux.params(m[1], m[3:end])</code></pre><p>Sometimes, a more fine-tuned control is needed.  We can freeze a specific parameter of a specific layer which already entered a <code>Params</code> object <code>ps</code>,  by simply deleting it from <code>ps</code>:</p><pre><code class="language-julia">ps = params(m)
-delete!(ps, m[2].b) </code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../layers/">« Model Reference</a><a class="docs-footer-nextpage" href="../nnlib/">NNlib »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+delete!(ps, m[2].b) </code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../layers/">« Model Reference</a><a class="docs-footer-nextpage" href="../nnlib/">NNlib »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/models/basics/index.html b/dev/models/basics/index.html
index b7448ba7..4858f238 100644
--- a/dev/models/basics/index.html
+++ b/dev/models/basics/index.html
@@ -109,8 +109,8 @@ model2(rand(10)) # =&gt; 2-element vector</code></pre><p>This quickly starts to
 m(rand(10))</code></pre><p>Likewise, <code>Chain</code> will happily work with any Julia function.</p><pre><code class="language-julia">m = Chain(x -&gt; x^2, x -&gt; x+1)
 
 m(5) # =&gt; 26</code></pre><h2 id="Layer-helpers-1"><a class="docs-heading-anchor" href="#Layer-helpers-1">Layer helpers</a><a class="docs-heading-anchor-permalink" href="#Layer-helpers-1" title="Permalink"></a></h2><p>Flux provides a set of helpers for custom layers, which you can enable by calling</p><pre><code class="language-julia">Flux.@functor Affine</code></pre><p>This enables a useful extra set of functionality for our <code>Affine</code> layer, such as <a href="../../training/optimisers/">collecting its parameters</a> or <a href="../../gpu/">moving it to the GPU</a>.</p><p>For some more helpful tricks, including parameter freezing, please checkout the <a href="../advanced/">advanced usage guide</a>.</p><h2 id="Utility-functions-1"><a class="docs-heading-anchor" href="#Utility-functions-1">Utility functions</a><a class="docs-heading-anchor-permalink" href="#Utility-functions-1" title="Permalink"></a></h2><p>Flux provides some utility functions to help you generate models in an automated fashion.</p><p><code>outdims</code> enables you to calculate the spatial output dimensions of layers like <code>Conv</code> when applied to input images of a given size. Currently limited to the following layers:</p><ul><li><code>Chain</code></li><li><code>Dense</code></li><li><code>Conv</code></li><li><code>Diagonal</code></li><li><code>Maxout</code></li><li><code>ConvTranspose</code></li><li><code>DepthwiseConv</code></li><li><code>CrossCor</code></li><li><code>MaxPool</code></li><li><code>MeanPool</code></li></ul><article class="docstring"><header><a class="docstring-binding" id="Flux.outdims" href="#Flux.outdims"><code>Flux.outdims</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">outdims(c::Chain, isize)</code></pre><p>Calculate the output dimensions given the input dimensions, <code>isize</code>.</p><pre><code class="language-julia">m = Chain(Conv((3, 3), 3 =&gt; 16), Conv((3, 3), 16 =&gt; 32))
-outdims(m, (10, 10)) == (6, 6)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/basic.jl#L50-L59">source</a></section><section><div><pre><code class="language-none">outdims(l::Dense, isize)</code></pre><p>Calculate the output dimensions given the input dimensions, <code>isize</code>.</p><pre><code class="language-julia">m = Dense(10, 5)
+outdims(m, (10, 10)) == (6, 6)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/basic.jl#L50-L59">source</a></section><section><div><pre><code class="language-none">outdims(l::Dense, isize)</code></pre><p>Calculate the output dimensions given the input dimensions, <code>isize</code>.</p><pre><code class="language-julia">m = Dense(10, 5)
 outdims(m, (5, 2)) == (5,)
-outdims(m, (10,)) == (5,)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/basic.jl#L139-L149">source</a></section><section><div><pre><code class="language-none">outdims(l::Conv, isize::Tuple)</code></pre><p>Calculate the output dimensions given the input dimensions <code>isize</code>. Batch size and channel size are ignored as per <a href="https://github.com/FluxML/NNlib.jl">NNlib.jl</a>.</p><pre><code class="language-julia">m = Conv((3, 3), 3 =&gt; 16)
+outdims(m, (10,)) == (5,)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/basic.jl#L139-L149">source</a></section><section><div><pre><code class="language-none">outdims(l::Conv, isize::Tuple)</code></pre><p>Calculate the output dimensions given the input dimensions <code>isize</code>. Batch size and channel size are ignored as per <a href="https://github.com/FluxML/NNlib.jl">NNlib.jl</a>.</p><pre><code class="language-julia">m = Conv((3, 3), 3 =&gt; 16)
 outdims(m, (10, 10)) == (8, 8)
-outdims(m, (10, 10, 1, 3)) == (8, 8)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L153-L164">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../">« Home</a><a class="docs-footer-nextpage" href="../recurrence/">Recurrence »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+outdims(m, (10, 10, 1, 3)) == (8, 8)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L153-L164">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../">« Home</a><a class="docs-footer-nextpage" href="../recurrence/">Recurrence »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/models/layers/index.html b/dev/models/layers/index.html
index c602f18a..ded7ec64 100644
--- a/dev/models/layers/index.html
+++ b/dev/models/layers/index.html
@@ -16,7 +16,7 @@ julia&gt; m = Chain(Dense(10, 5), Dense(5, 2));
 julia&gt; x = rand(10);
 
 julia&gt; m(x) == m[2](m[1](x))
-true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/basic.jl#L1-L24">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Dense" href="#Flux.Dense"><code>Flux.Dense</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Dense(in::Integer, out::Integer, σ = identity)</code></pre><p>Create a traditional <code>Dense</code> layer with parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-none">y = σ.(W * x .+ b)</code></pre><p>The input <code>x</code> must be a vector of length <code>in</code>, or a batch of vectors represented as an <code>in × N</code> matrix. The out <code>y</code> will be a vector or batch of length <code>out</code>.</p><p><strong>Examples</strong></p><p>```jldoctest; setup = :(using Random; Random.seed!(0)) julia&gt; d = Dense(5, 2) Dense(5, 2)</p><p>julia&gt; d(rand(5)) 2-element Array{Float32,1}:   -0.16210233    0.12311903```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/basic.jl#L85-L104">source</a></section></article><h2 id="Convolution-and-Pooling-Layers-1"><a class="docs-heading-anchor" href="#Convolution-and-Pooling-Layers-1">Convolution and Pooling Layers</a><a class="docs-heading-anchor-permalink" href="#Convolution-and-Pooling-Layers-1" title="Permalink"></a></h2><p>These layers are used to build convolutional neural networks (CNNs).</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Conv" href="#Flux.Conv"><code>Flux.Conv</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Conv(filter, in =&gt; out, σ = identity; init = glorot_uniform,
+true</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/basic.jl#L1-L24">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Dense" href="#Flux.Dense"><code>Flux.Dense</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Dense(in::Integer, out::Integer, σ = identity)</code></pre><p>Create a traditional <code>Dense</code> layer with parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-none">y = σ.(W * x .+ b)</code></pre><p>The input <code>x</code> must be a vector of length <code>in</code>, or a batch of vectors represented as an <code>in × N</code> matrix. The out <code>y</code> will be a vector or batch of length <code>out</code>.</p><p><strong>Examples</strong></p><p>```jldoctest; setup = :(using Random; Random.seed!(0)) julia&gt; d = Dense(5, 2) Dense(5, 2)</p><p>julia&gt; d(rand(5)) 2-element Array{Float32,1}:   -0.16210233    0.12311903```</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/basic.jl#L85-L104">source</a></section></article><h2 id="Convolution-and-Pooling-Layers-1"><a class="docs-heading-anchor" href="#Convolution-and-Pooling-Layers-1">Convolution and Pooling Layers</a><a class="docs-heading-anchor-permalink" href="#Convolution-and-Pooling-Layers-1" title="Permalink"></a></h2><p>These layers are used to build convolutional neural networks (CNNs).</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Conv" href="#Flux.Conv"><code>Flux.Conv</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Conv(filter, in =&gt; out, σ = identity; init = glorot_uniform,
      stride = 1, pad = 0, dilation = 1)
 
 filter = (2,2)
@@ -25,30 +25,30 @@ out = 16
 Conv((2, 2), 1=&gt;16, relu)</code></pre><p>Standard convolutional layer. <code>filter</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively.</p><p>Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a <code>100×100×3×1</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Accepts keyword arguments <code>weight</code> and <code>bias</code> to set the corresponding fields. Setting <code>bias</code> to <code>Flux.Zeros()</code> will switch bias off for the layer.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>. Use <code>pad=SamePad()</code> to apply padding so that outputsize == inputsize / stride.</p><p><strong>Examples</strong></p><p>Apply a <code>Conv</code> layer to a 1-channel input using a 2×2 window filter size, giving us a 16-channel output. Output is activated with ReLU.</p><pre><code class="language-julia">filter = (2,2)
 in = 1
 out = 16
-Conv(filter, in =&gt; out, relu)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L32-L64">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.MaxPool" href="#Flux.MaxPool"><code>Flux.MaxPool</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">MaxPool(k; pad = 0, stride = k)</code></pre><p>Max pooling layer. <code>k</code> is the size of the window for each dimension of the input.</p><p><strong>Use <code>pad=SamePad()</code> to apply padding so that outputsize == inputsize / stride.</strong></p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L533-L540">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.GlobalMaxPool" href="#Flux.GlobalMaxPool"><code>Flux.GlobalMaxPool</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">GlobalMaxPool()</code></pre><p>Global max pooling layer.</p><p>Transforms (w,h,c,b)-shaped input into (1,1,c,b)-shaped output, by performing max pooling on the complete (w,h)-shaped feature maps.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L483-L490">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.MeanPool" href="#Flux.MeanPool"><code>Flux.MeanPool</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">MeanPool(k; pad = 0, stride = k)</code></pre><p>Mean pooling layer. <code>k</code> is the size of the window for each dimension of the input.</p><p>Use <code>pad=SamePad()</code> to apply padding so that outputsize == inputsize / stride.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L564-L570">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.GlobalMeanPool" href="#Flux.GlobalMeanPool"><code>Flux.GlobalMeanPool</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">GlobalMeanPool()</code></pre><p>Global mean pooling layer.</p><p>Transforms (w,h,c,b)-shaped input into (1,1,c,b)-shaped output, by performing mean pooling on the complete (w,h)-shaped feature maps.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L508-L515">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.DepthwiseConv" href="#Flux.DepthwiseConv"><code>Flux.DepthwiseConv</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">DepthwiseConv(filter::Tuple, in=&gt;out)
+Conv(filter, in =&gt; out, relu)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L32-L64">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.MaxPool" href="#Flux.MaxPool"><code>Flux.MaxPool</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">MaxPool(k; pad = 0, stride = k)</code></pre><p>Max pooling layer. <code>k</code> is the size of the window for each dimension of the input.</p><p><strong>Use <code>pad=SamePad()</code> to apply padding so that outputsize == inputsize / stride.</strong></p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L533-L540">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.GlobalMaxPool" href="#Flux.GlobalMaxPool"><code>Flux.GlobalMaxPool</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">GlobalMaxPool()</code></pre><p>Global max pooling layer.</p><p>Transforms (w,h,c,b)-shaped input into (1,1,c,b)-shaped output, by performing max pooling on the complete (w,h)-shaped feature maps.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L483-L490">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.MeanPool" href="#Flux.MeanPool"><code>Flux.MeanPool</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">MeanPool(k; pad = 0, stride = k)</code></pre><p>Mean pooling layer. <code>k</code> is the size of the window for each dimension of the input.</p><p>Use <code>pad=SamePad()</code> to apply padding so that outputsize == inputsize / stride.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L564-L570">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.GlobalMeanPool" href="#Flux.GlobalMeanPool"><code>Flux.GlobalMeanPool</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">GlobalMeanPool()</code></pre><p>Global mean pooling layer.</p><p>Transforms (w,h,c,b)-shaped input into (1,1,c,b)-shaped output, by performing mean pooling on the complete (w,h)-shaped feature maps.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L508-L515">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.DepthwiseConv" href="#Flux.DepthwiseConv"><code>Flux.DepthwiseConv</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">DepthwiseConv(filter::Tuple, in=&gt;out)
 DepthwiseConv(filter::Tuple, in=&gt;out, activation)
 DepthwiseConv(filter, in =&gt; out, σ = identity; init = glorot_uniform,
-              stride = 1, pad = 0, dilation = 1)</code></pre><p>Depthwise convolutional layer. <code>filter</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively. Note that <code>out</code> must be an integer multiple of <code>in</code>.</p><p>Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a <code>100×100×3×1</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Accepts keyword arguments <code>weight</code> and <code>bias</code> to set the corresponding fields. Setting <code>bias</code> to <code>Flux.Zeros()</code> will switch bias off for the layer.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>. Use <code>pad=SamePad()</code> to apply padding so that outputsize == inputsize / stride.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L271-L290">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.ConvTranspose" href="#Flux.ConvTranspose"><code>Flux.ConvTranspose</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ConvTranspose(filter, in=&gt;out)
+              stride = 1, pad = 0, dilation = 1)</code></pre><p>Depthwise convolutional layer. <code>filter</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively. Note that <code>out</code> must be an integer multiple of <code>in</code>.</p><p>Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a <code>100×100×3×1</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Accepts keyword arguments <code>weight</code> and <code>bias</code> to set the corresponding fields. Setting <code>bias</code> to <code>Flux.Zeros()</code> will switch bias off for the layer.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>. Use <code>pad=SamePad()</code> to apply padding so that outputsize == inputsize / stride.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L271-L290">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.ConvTranspose" href="#Flux.ConvTranspose"><code>Flux.ConvTranspose</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ConvTranspose(filter, in=&gt;out)
 ConvTranspose(filter, in=&gt;out, activation)
 ConvTranspose(filter, in =&gt; out, σ = identity; init = glorot_uniform,
-              stride = 1, pad = 0, dilation = 1)</code></pre><p>Standard convolutional transpose layer. <code>filter</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively.</p><p>Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a <code>100×100×3×1</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Accepts keyword arguments <code>weight</code> and <code>bias</code> to set the corresponding fields. Setting <code>bias</code> to <code>Flux.Zeros()</code> will switch bias off for the layer.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>. Use <code>pad=SamePad()</code> to apply padding so that outputsize == stride * inputsize - stride + 1.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L168-L186">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.CrossCor" href="#Flux.CrossCor"><code>Flux.CrossCor</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">CrossCor(filter, in=&gt;out)
+              stride = 1, pad = 0, dilation = 1)</code></pre><p>Standard convolutional transpose layer. <code>filter</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively.</p><p>Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a <code>100×100×3×1</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Accepts keyword arguments <code>weight</code> and <code>bias</code> to set the corresponding fields. Setting <code>bias</code> to <code>Flux.Zeros()</code> will switch bias off for the layer.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>. Use <code>pad=SamePad()</code> to apply padding so that outputsize == stride * inputsize - stride + 1.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L168-L186">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.CrossCor" href="#Flux.CrossCor"><code>Flux.CrossCor</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">CrossCor(filter, in=&gt;out)
 CrossCor(filter, in=&gt;out, activation)
 CrossCor(filter, in =&gt; out, σ = identity; init = glorot_uniform,
          stride = 1, pad = 0, dilation = 1)</code></pre><p>Standard cross convolutional layer. <code>filter</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively.</p><p>Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a <code>100×100×3×1</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Accepts keyword arguments <code>weight</code> and <code>bias</code> to set the corresponding fields. Setting <code>bias</code> to <code>Flux.Zeros()</code> will switch bias off for the layer.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>. Use <code>pad=SamePad()</code> to apply padding so that outputsize == inputsize / stride.</p><p><strong>Examples</strong></p><p>Apply a <code>CrossCor</code> layer to a 1-channel input using a 2×2 window filter size, giving us a 16-channel output. Output is activated with ReLU.</p><pre><code class="language-julia">filter = (2,2)
 in = 1
 out = 16
-CrossCor((2, 2), 1=&gt;16, relu)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/conv.jl#L379-L408">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.flatten" href="#Flux.flatten"><code>Flux.flatten</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">flatten(x::AbstractArray)</code></pre><p>Transform (w, h, c, b)-shaped input into (w × h × c, b)-shaped output by linearizing all values for each element in the batch.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L256-L261">source</a></section></article><h2 id="Recurrent-Layers-1"><a class="docs-heading-anchor" href="#Recurrent-Layers-1">Recurrent Layers</a><a class="docs-heading-anchor-permalink" href="#Recurrent-Layers-1" title="Permalink"></a></h2><p>Much like the core layers above, but can be used to process sequence data (as well as other kinds of structured data).</p><article class="docstring"><header><a class="docstring-binding" id="Flux.RNN" href="#Flux.RNN"><code>Flux.RNN</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">RNN(in::Integer, out::Integer, σ = tanh)</code></pre><p>The most basic recurrent layer; essentially acts as a <code>Dense</code> layer, but with the output fed back into the input each time step.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/recurrent.jl#L91-L96">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.LSTM" href="#Flux.LSTM"><code>Flux.LSTM</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">LSTM(in::Integer, out::Integer)</code></pre><p><a href="https://www.researchgate.net/publication/13853244_Long_Short-term_Memory">Long Short Term Memory</a> recurrent layer. Behaves like an RNN but generally exhibits a longer memory span over sequences.</p><p>See <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/">this article</a> for a good overview of the internals.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/recurrent.jl#L136-L144">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.GRU" href="#Flux.GRU"><code>Flux.GRU</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">GRU(in::Integer, out::Integer)</code></pre><p><a href="https://arxiv.org/abs/1406.1078">Gated Recurrent Unit</a> layer. Behaves like an RNN but generally exhibits a longer memory span over sequences.</p><p>See <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/">this article</a> for a good overview of the internals.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/recurrent.jl#L177-L185">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Recur" href="#Flux.Recur"><code>Flux.Recur</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Recur(cell)</code></pre><p><code>Recur</code> takes a recurrent cell and makes it stateful, managing the hidden state in the background. <code>cell</code> should be a model of the form:</p><pre><code class="language-none">h, y = cell(h, x...)</code></pre><p>For example, here&#39;s a recurrent network that keeps a running total of its inputs:</p><pre><code class="language-julia">accum(h, x) = (h + x, x)
+CrossCor((2, 2), 1=&gt;16, relu)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/conv.jl#L379-L408">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.flatten" href="#Flux.flatten"><code>Flux.flatten</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">flatten(x::AbstractArray)</code></pre><p>Transform (w, h, c, b)-shaped input into (w × h × c, b)-shaped output by linearizing all values for each element in the batch.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L256-L261">source</a></section></article><h2 id="Recurrent-Layers-1"><a class="docs-heading-anchor" href="#Recurrent-Layers-1">Recurrent Layers</a><a class="docs-heading-anchor-permalink" href="#Recurrent-Layers-1" title="Permalink"></a></h2><p>Much like the core layers above, but can be used to process sequence data (as well as other kinds of structured data).</p><article class="docstring"><header><a class="docstring-binding" id="Flux.RNN" href="#Flux.RNN"><code>Flux.RNN</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">RNN(in::Integer, out::Integer, σ = tanh)</code></pre><p>The most basic recurrent layer; essentially acts as a <code>Dense</code> layer, but with the output fed back into the input each time step.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/recurrent.jl#L91-L96">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.LSTM" href="#Flux.LSTM"><code>Flux.LSTM</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">LSTM(in::Integer, out::Integer)</code></pre><p><a href="https://www.researchgate.net/publication/13853244_Long_Short-term_Memory">Long Short Term Memory</a> recurrent layer. Behaves like an RNN but generally exhibits a longer memory span over sequences.</p><p>See <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/">this article</a> for a good overview of the internals.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/recurrent.jl#L136-L144">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.GRU" href="#Flux.GRU"><code>Flux.GRU</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">GRU(in::Integer, out::Integer)</code></pre><p><a href="https://arxiv.org/abs/1406.1078">Gated Recurrent Unit</a> layer. Behaves like an RNN but generally exhibits a longer memory span over sequences.</p><p>See <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/">this article</a> for a good overview of the internals.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/recurrent.jl#L177-L185">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Recur" href="#Flux.Recur"><code>Flux.Recur</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Recur(cell)</code></pre><p><code>Recur</code> takes a recurrent cell and makes it stateful, managing the hidden state in the background. <code>cell</code> should be a model of the form:</p><pre><code class="language-none">h, y = cell(h, x...)</code></pre><p>For example, here&#39;s a recurrent network that keeps a running total of its inputs:</p><pre><code class="language-julia">accum(h, x) = (h + x, x)
 rnn = Flux.Recur(accum, 0)
 rnn(2)      # 2
 rnn(3)      # 3
 rnn.state   # 5
 rnn.(1:10)  # apply to a sequence
-rnn.state   # 60</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/recurrent.jl#L7-L26">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.reset!" href="#Flux.reset!"><code>Flux.reset!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">reset!(rnn)</code></pre><p>Reset the hidden state of a recurrent layer back to its original value.</p><p>Assuming you have a <code>Recur</code> layer <code>rnn</code>, this is roughly equivalent to:</p><pre><code class="language-julia">rnn.state = hidden(rnn.cell)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/recurrent.jl#L45-L54">source</a></section></article><h2 id="Other-General-Purpose-Layers-1"><a class="docs-heading-anchor" href="#Other-General-Purpose-Layers-1">Other General Purpose Layers</a><a class="docs-heading-anchor-permalink" href="#Other-General-Purpose-Layers-1" title="Permalink"></a></h2><p>These are marginally more obscure than the Basic Layers. But in contrast to the layers described in the other sections are not readily grouped around a particular purpose (e.g. CNNs or RNNs).</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Maxout" href="#Flux.Maxout"><code>Flux.Maxout</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Maxout(over)</code></pre><p>The <a href="https://arxiv.org/pdf/1302.4389.pdf">Maxout</a> layer has a number of internal layers which all receive the same input. It returns the elementwise maximum of the internal layers&#39; outputs.</p><p>Maxout over linear dense layers satisfies the univeral approximation theorem.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/basic.jl#L183-L191">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.SkipConnection" href="#Flux.SkipConnection"><code>Flux.SkipConnection</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">SkipConnection(layer, connection)</code></pre><p>Create a skip connection which consists of a layer or <code>Chain</code> of consecutive layers and a shortcut connection linking the block&#39;s input to the output through a user-supplied 2-argument callable. The first argument to the callable will be propagated through the given <code>layer</code> while the second is the unchanged, &quot;skipped&quot; input.</p><p>The simplest &quot;ResNet&quot;-type connection is just <code>SkipConnection(layer, +)</code>, and requires the output of the layers to be the same shape as the input. Here is a more complicated example:</p><pre><code class="language-julia">m = Conv((3,3), 4=&gt;7, pad=(1,1))
+rnn.state   # 60</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/recurrent.jl#L7-L26">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.reset!" href="#Flux.reset!"><code>Flux.reset!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">reset!(rnn)</code></pre><p>Reset the hidden state of a recurrent layer back to its original value.</p><p>Assuming you have a <code>Recur</code> layer <code>rnn</code>, this is roughly equivalent to:</p><pre><code class="language-julia">rnn.state = hidden(rnn.cell)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/recurrent.jl#L45-L54">source</a></section></article><h2 id="Other-General-Purpose-Layers-1"><a class="docs-heading-anchor" href="#Other-General-Purpose-Layers-1">Other General Purpose Layers</a><a class="docs-heading-anchor-permalink" href="#Other-General-Purpose-Layers-1" title="Permalink"></a></h2><p>These are marginally more obscure than the Basic Layers. But in contrast to the layers described in the other sections are not readily grouped around a particular purpose (e.g. CNNs or RNNs).</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Maxout" href="#Flux.Maxout"><code>Flux.Maxout</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Maxout(over)</code></pre><p>The <a href="https://arxiv.org/pdf/1302.4389.pdf">Maxout</a> layer has a number of internal layers which all receive the same input. It returns the elementwise maximum of the internal layers&#39; outputs.</p><p>Maxout over linear dense layers satisfies the univeral approximation theorem.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/basic.jl#L183-L191">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.SkipConnection" href="#Flux.SkipConnection"><code>Flux.SkipConnection</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">SkipConnection(layer, connection)</code></pre><p>Create a skip connection which consists of a layer or <code>Chain</code> of consecutive layers and a shortcut connection linking the block&#39;s input to the output through a user-supplied 2-argument callable. The first argument to the callable will be propagated through the given <code>layer</code> while the second is the unchanged, &quot;skipped&quot; input.</p><p>The simplest &quot;ResNet&quot;-type connection is just <code>SkipConnection(layer, +)</code>, and requires the output of the layers to be the same shape as the input. Here is a more complicated example:</p><pre><code class="language-julia">m = Conv((3,3), 4=&gt;7, pad=(1,1))
 x = ones(5,5,4,10);
 size(m(x)) == (5, 5, 7, 10)
 
 sm = SkipConnection(m, (mx, x) -&gt; cat(mx, x, dims=3))
-size(sm(x)) == (5, 5, 11, 10)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/basic.jl#L226-L246">source</a></section></article><h2 id="Normalisation-and-Regularisation-1"><a class="docs-heading-anchor" href="#Normalisation-and-Regularisation-1">Normalisation &amp; Regularisation</a><a class="docs-heading-anchor-permalink" href="#Normalisation-and-Regularisation-1" title="Permalink"></a></h2><p>These layers don&#39;t affect the structure of the network but may improve training times or reduce overfitting.</p><article class="docstring"><header><a class="docstring-binding" id="Flux.normalise" href="#Flux.normalise"><code>Flux.normalise</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">normalise(x; dims=1)</code></pre><p>Normalise <code>x</code> to mean 0 and standard deviation 1 across the dimensions given by <code>dims</code>. Defaults to normalising over columns.</p><pre><code class="language-julia-repl">julia&gt; a = reshape(collect(1:9), 3, 3)
+size(sm(x)) == (5, 5, 11, 10)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/basic.jl#L226-L246">source</a></section></article><h2 id="Normalisation-and-Regularisation-1"><a class="docs-heading-anchor" href="#Normalisation-and-Regularisation-1">Normalisation &amp; Regularisation</a><a class="docs-heading-anchor-permalink" href="#Normalisation-and-Regularisation-1" title="Permalink"></a></h2><p>These layers don&#39;t affect the structure of the network but may improve training times or reduce overfitting.</p><article class="docstring"><header><a class="docstring-binding" id="Flux.normalise" href="#Flux.normalise"><code>Flux.normalise</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">normalise(x; dims=1)</code></pre><p>Normalise <code>x</code> to mean 0 and standard deviation 1 across the dimensions given by <code>dims</code>. Defaults to normalising over columns.</p><pre><code class="language-julia-repl">julia&gt; a = reshape(collect(1:9), 3, 3)
 3×3 Array{Int64,2}:
  1  4  7
  2  5  8
@@ -64,35 +64,35 @@ julia&gt; Flux.normalise(a, dims=2)
 3×3 Array{Float64,2}:
  -1.22474  0.0  1.22474
  -1.22474  0.0  1.22474
- -1.22474  0.0  1.22474</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L153-L178">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.BatchNorm" href="#Flux.BatchNorm"><code>Flux.BatchNorm</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">BatchNorm(channels::Integer, σ = identity;
+ -1.22474  0.0  1.22474</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L153-L178">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.BatchNorm" href="#Flux.BatchNorm"><code>Flux.BatchNorm</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">BatchNorm(channels::Integer, σ = identity;
           initβ = zeros, initγ = ones,
           ϵ = 1e-8, momentum = .1)</code></pre><p><a href="https://arxiv.org/pdf/1502.03167.pdf">Batch Normalization</a> layer. <code>channels</code> should be the size of the channel dimension in your data (see below).</p><p>Given an array with <code>N</code> dimensions, call the <code>N-1</code>th the channel dimension. (For a batch of feature vectors this is just the data dimension, for <code>WHCN</code> images it&#39;s the usual channel dimension.)</p><p><code>BatchNorm</code> computes the mean and variance for each each <code>W×H×1×N</code> slice and shifts them to have a new mean and variance (corresponding to the learnable, per-channel <code>bias</code> and <code>scale</code> parameters).</p><p>Use <a href="#Flux.testmode!"><code>testmode!</code></a> during inference.</p><p><strong>Examples</strong></p><pre><code class="language-julia">m = Chain(
   Dense(28^2, 64),
   BatchNorm(64, relu),
   Dense(64, 10),
   BatchNorm(10),
-  softmax)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/normalise.jl#L122-L149">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.dropout" href="#Flux.dropout"><code>Flux.dropout</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">dropout(x, p; dims = :)</code></pre><p>The dropout function. For each input, either sets that input to <code>0</code> (with probability <code>p</code>) or scales it by <code>1 / (1 - p)</code>. <code>dims</code> specifies the unbroadcasted dimensions, e.g. <code>dims=1</code> applies dropout along columns and <code>dims=2</code> along rows. This is used as a regularisation, i.e. it reduces overfitting during training.</p><p>See also the <a href="#Flux.Dropout"><code>Dropout</code></a> layer.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/normalise.jl#L12-L21">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Dropout" href="#Flux.Dropout"><code>Flux.Dropout</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Dropout(p, dims = :)</code></pre><p>Dropout layer. In the forward pass, apply the <a href="#Flux.dropout"><code>Flux.dropout</code></a> function on the input.</p><p>Does nothing to the input once <a href="#Flux.testmode!"><code>Flux.testmode!</code></a> is <code>true</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/normalise.jl#L30-L36">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.AlphaDropout" href="#Flux.AlphaDropout"><code>Flux.AlphaDropout</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">AlphaDropout(p)</code></pre><p>A dropout layer. Used in <a href="https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf">Self-Normalizing Neural Networks</a>. The AlphaDropout layer ensures that mean and variance of activations remain the same as before.</p><p>Does nothing to the input once <a href="#Flux.testmode!"><code>testmode!</code></a> is true.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/normalise.jl#L65-L74">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.LayerNorm" href="#Flux.LayerNorm"><code>Flux.LayerNorm</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">LayerNorm(h::Integer)</code></pre><p>A <a href="https://arxiv.org/pdf/1607.06450.pdf">normalisation layer</a> designed to be used with recurrent hidden states of size <code>h</code>. Normalises the mean and standard deviation of each input before applying a per-neuron gain/bias.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/normalise.jl#L100-L106">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.InstanceNorm" href="#Flux.InstanceNorm"><code>Flux.InstanceNorm</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">InstanceNorm(channels::Integer, σ = identity;
+  softmax)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/normalise.jl#L122-L149">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.dropout" href="#Flux.dropout"><code>Flux.dropout</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">dropout(x, p; dims = :)</code></pre><p>The dropout function. For each input, either sets that input to <code>0</code> (with probability <code>p</code>) or scales it by <code>1 / (1 - p)</code>. <code>dims</code> specifies the unbroadcasted dimensions, e.g. <code>dims=1</code> applies dropout along columns and <code>dims=2</code> along rows. This is used as a regularisation, i.e. it reduces overfitting during training.</p><p>See also the <a href="#Flux.Dropout"><code>Dropout</code></a> layer.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/normalise.jl#L12-L21">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Dropout" href="#Flux.Dropout"><code>Flux.Dropout</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Dropout(p, dims = :)</code></pre><p>Dropout layer. In the forward pass, apply the <a href="#Flux.dropout"><code>Flux.dropout</code></a> function on the input.</p><p>Does nothing to the input once <a href="#Flux.testmode!"><code>Flux.testmode!</code></a> is <code>true</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/normalise.jl#L30-L36">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.AlphaDropout" href="#Flux.AlphaDropout"><code>Flux.AlphaDropout</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">AlphaDropout(p)</code></pre><p>A dropout layer. Used in <a href="https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf">Self-Normalizing Neural Networks</a>. The AlphaDropout layer ensures that mean and variance of activations remain the same as before.</p><p>Does nothing to the input once <a href="#Flux.testmode!"><code>testmode!</code></a> is true.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/normalise.jl#L65-L74">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.LayerNorm" href="#Flux.LayerNorm"><code>Flux.LayerNorm</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">LayerNorm(h::Integer)</code></pre><p>A <a href="https://arxiv.org/pdf/1607.06450.pdf">normalisation layer</a> designed to be used with recurrent hidden states of size <code>h</code>. Normalises the mean and standard deviation of each input before applying a per-neuron gain/bias.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/normalise.jl#L100-L106">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.InstanceNorm" href="#Flux.InstanceNorm"><code>Flux.InstanceNorm</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">InstanceNorm(channels::Integer, σ = identity;
              initβ = zeros, initγ = ones,
              ϵ = 1e-8, momentum = .1)</code></pre><p><a href="https://arxiv.org/abs/1607.08022">Instance Normalization</a> layer. <code>channels</code> should be the size of the channel dimension in your data (see below).</p><p>Given an array with <code>N</code> dimensions, call the <code>N-1</code>th the channel dimension. (For a batch of feature vectors this is just the data dimension, for <code>WHCN</code> images it&#39;s the usual channel dimension.)</p><p><code>InstanceNorm</code> computes the mean and variance for each each <code>W×H×1×1</code> slice and shifts them to have a new mean and variance (corresponding to the learnable, per-channel <code>bias</code> and <code>scale</code> parameters).</p><p>Use <a href="#Flux.testmode!"><code>testmode!</code></a> during inference.</p><p><strong>Examples</strong></p><pre><code class="language-julia">m = Chain(
   Dense(28^2, 64),
   InstanceNorm(64, relu),
   Dense(64, 10),
   InstanceNorm(10),
-  softmax)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/normalise.jl#L228-L255">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.GroupNorm" href="#Flux.GroupNorm"><code>Flux.GroupNorm</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">GroupNorm(chs::Integer, G::Integer, λ = identity;
+  softmax)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/normalise.jl#L228-L255">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.GroupNorm" href="#Flux.GroupNorm"><code>Flux.GroupNorm</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">GroupNorm(chs::Integer, G::Integer, λ = identity;
           initβ = (i) -&gt; zeros(Float32, i), initγ = (i) -&gt; ones(Float32, i),
           ϵ = 1f-5, momentum = 0.1f0)</code></pre><p><a href="https://arxiv.org/pdf/1803.08494.pdf">Group Normalization</a> layer. This layer can outperform Batch Normalization and Instance Normalization.</p><p><code>chs</code> is the number of channels, the channel dimension of your input. For an array of N dimensions, the <code>N-1</code>th index is the channel dimension.</p><p><code>G</code> is the number of groups along which the statistics are computed. The number of channels must be an integer multiple of the number of groups.</p><p>Use <a href="#Flux.testmode!"><code>testmode!</code></a> during inference.</p><p><strong>Examples</strong></p><pre><code class="language-julia">m = Chain(Conv((3,3), 1=&gt;32, leakyrelu;pad = 1),
           GroupNorm(32,16))
-          # 32 channels, 16 groups (G = 16), thus 2 channels per group used</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/normalise.jl#L313-L335">source</a></section></article><h3 id="Testmode-1"><a class="docs-heading-anchor" href="#Testmode-1">Testmode</a><a class="docs-heading-anchor-permalink" href="#Testmode-1" title="Permalink"></a></h3><p>Many normalisation layers behave differently under training and inference (testing). By default, Flux will automatically determine when a layer evaluation is part of training or inference. Still, depending on your use case, it may be helpful to manually specify when these layers should be treated as being trained or not. For this, Flux provides <code>Flux.testmode!</code>. When called on a model (e.g. a layer or chain of layers), this function will place the model into the mode specified.</p><article class="docstring"><header><a class="docstring-binding" id="Flux.testmode!" href="#Flux.testmode!"><code>Flux.testmode!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">testmode!(m, mode = true)</code></pre><p>Set a layer or model&#39;s test mode (see below). Using <code>:auto</code> mode will treat any gradient computation as training.</p><p><em>Note</em>: if you manually set a model into test mode, you need to manually place it back into train mode during training phase.</p><p>Possible values include:</p><ul><li><code>false</code> for training</li><li><code>true</code> for testing</li><li><code>:auto</code> or <code>nothing</code> for Flux to detect the mode automatically</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/functor.jl#L42-L55">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.trainmode!" href="#Flux.trainmode!"><code>Flux.trainmode!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">trainmode!(m, mode = true)</code></pre><p>Set a layer of model&#39;s train mode (see below). Symmetric to <a href="#Flux.testmode!"><code>testmode!</code></a> (i.e. `trainmode!(m, mode) == testmode!(m, !mode)).</p><p><em>Note</em>: if you manually set a model into train mode, you need to manually place it into test mode during testing phase.</p><p>Possible values include:</p><ul><li><code>true</code> for training</li><li><code>false</code> for testing</li><li><code>:auto</code> or <code>nothing</code> for Flux to detect the mode automatically</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/functor.jl#L58-L71">source</a></section></article><h2 id="Cost-Functions-1"><a class="docs-heading-anchor" href="#Cost-Functions-1">Cost Functions</a><a class="docs-heading-anchor-permalink" href="#Cost-Functions-1" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-binding" id="Flux.mae" href="#Flux.mae"><code>Flux.mae</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">mae(ŷ, y)</code></pre><p>Return the mean of absolute error; calculated as <code>sum(abs.(ŷ .- y)) / length(y)</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L2-L7">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.mse" href="#Flux.mse"><code>Flux.mse</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">mse(ŷ, y)</code></pre><p>Return the mean squared error between ŷ and y; calculated as <code>sum((ŷ .- y).^2) / length(y)</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.mse([0, 2], [1, 1])
-1//1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L11-L22">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.msle" href="#Flux.msle"><code>Flux.msle</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">msle(ŷ, y; ϵ=eps(eltype(ŷ)))</code></pre><p>Return the mean of the squared logarithmic errors; calculated as <code>sum((log.(ŷ .+ ϵ) .- log.(y .+ ϵ)).^2) / length(y)</code>. The <code>ϵ</code> term provides numerical stability.</p><p>Penalizes an under-predicted estimate greater than an over-predicted estimate.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L26-L34">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.huber_loss" href="#Flux.huber_loss"><code>Flux.huber_loss</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">huber_loss(ŷ, y; δ=1.0)</code></pre><p>Return the mean of the <a href="https://en.wikipedia.org/wiki/Huber_loss">Huber loss</a> given the prediction <code>ŷ</code> and true values <code>y</code>.</p><pre><code class="language-none">             | 0.5 * |ŷ - y|,            for |ŷ - y| &lt;= δ
+          # 32 channels, 16 groups (G = 16), thus 2 channels per group used</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/normalise.jl#L313-L335">source</a></section></article><h3 id="Testmode-1"><a class="docs-heading-anchor" href="#Testmode-1">Testmode</a><a class="docs-heading-anchor-permalink" href="#Testmode-1" title="Permalink"></a></h3><p>Many normalisation layers behave differently under training and inference (testing). By default, Flux will automatically determine when a layer evaluation is part of training or inference. Still, depending on your use case, it may be helpful to manually specify when these layers should be treated as being trained or not. For this, Flux provides <code>Flux.testmode!</code>. When called on a model (e.g. a layer or chain of layers), this function will place the model into the mode specified.</p><article class="docstring"><header><a class="docstring-binding" id="Flux.testmode!" href="#Flux.testmode!"><code>Flux.testmode!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">testmode!(m, mode = true)</code></pre><p>Set a layer or model&#39;s test mode (see below). Using <code>:auto</code> mode will treat any gradient computation as training.</p><p><em>Note</em>: if you manually set a model into test mode, you need to manually place it back into train mode during training phase.</p><p>Possible values include:</p><ul><li><code>false</code> for training</li><li><code>true</code> for testing</li><li><code>:auto</code> or <code>nothing</code> for Flux to detect the mode automatically</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/functor.jl#L7-L20">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.trainmode!" href="#Flux.trainmode!"><code>Flux.trainmode!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">trainmode!(m, mode = true)</code></pre><p>Set a layer of model&#39;s train mode (see below). Symmetric to <a href="#Flux.testmode!"><code>testmode!</code></a> (i.e. `trainmode!(m, mode) == testmode!(m, !mode)).</p><p><em>Note</em>: if you manually set a model into train mode, you need to manually place it into test mode during testing phase.</p><p>Possible values include:</p><ul><li><code>true</code> for training</li><li><code>false</code> for testing</li><li><code>:auto</code> or <code>nothing</code> for Flux to detect the mode automatically</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/functor.jl#L23-L36">source</a></section></article><h2 id="Cost-Functions-1"><a class="docs-heading-anchor" href="#Cost-Functions-1">Cost Functions</a><a class="docs-heading-anchor-permalink" href="#Cost-Functions-1" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-binding" id="Flux.mae" href="#Flux.mae"><code>Flux.mae</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">mae(ŷ, y)</code></pre><p>Return the mean of absolute error; calculated as <code>sum(abs.(ŷ .- y)) / length(y)</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L2-L7">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.mse" href="#Flux.mse"><code>Flux.mse</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">mse(ŷ, y)</code></pre><p>Return the mean squared error between ŷ and y; calculated as <code>sum((ŷ .- y).^2) / length(y)</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.mse([0, 2], [1, 1])
+1//1</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L11-L22">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.msle" href="#Flux.msle"><code>Flux.msle</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">msle(ŷ, y; ϵ=eps(eltype(ŷ)))</code></pre><p>Return the mean of the squared logarithmic errors; calculated as <code>sum((log.(ŷ .+ ϵ) .- log.(y .+ ϵ)).^2) / length(y)</code>. The <code>ϵ</code> term provides numerical stability.</p><p>Penalizes an under-predicted estimate greater than an over-predicted estimate.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L26-L34">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.huber_loss" href="#Flux.huber_loss"><code>Flux.huber_loss</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">huber_loss(ŷ, y; δ=1.0)</code></pre><p>Return the mean of the <a href="https://en.wikipedia.org/wiki/Huber_loss">Huber loss</a> given the prediction <code>ŷ</code> and true values <code>y</code>.</p><pre><code class="language-none">             | 0.5 * |ŷ - y|,            for |ŷ - y| &lt;= δ
 Huber loss = |
-             |  δ * (|ŷ - y| - 0.5 * δ), otherwise</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L39-L48">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.crossentropy" href="#Flux.crossentropy"><code>Flux.crossentropy</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">crossentropy(ŷ, y; weight = nothing)</code></pre><p>Return the cross entropy between the given probability distributions; calculated as <code>-sum(y .* log.(ŷ) .* weight) / size(y, 2)</code>.</p><p><code>weight</code> can be <code>Nothing</code>, a <code>Number</code> or an <code>AbstractVector</code>. <code>weight=nothing</code> acts like <code>weight=1</code> but is faster.</p><p>See also: <a href="#Flux.logitcrossentropy"><code>Flux.logitcrossentropy</code></a>, <a href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy</code></a>, <a href="#Flux.logitbinarycrossentropy"><code>Flux.logitbinarycrossentropy</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.crossentropy(softmax([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
-3.085467254747739</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L68-L84">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.logitcrossentropy" href="#Flux.logitcrossentropy"><code>Flux.logitcrossentropy</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">logitcrossentropy(ŷ, y; weight = 1)</code></pre><p>Return the crossentropy computed after a <a href="../nnlib/#NNlib.logsoftmax"><code>Flux.logsoftmax</code></a> operation; calculated as <code>-sum(y .* logsoftmax(ŷ) .* weight) / size(y, 2)</code>.</p><p><code>logitcrossentropy(ŷ, y)</code> is mathematically equivalent to <a href="#Flux.crossentropy"><code>Flux.crossentropy(softmax(log(ŷ)), y)</code></a> but it is more numerically stable.</p><p>See also: <a href="#Flux.crossentropy"><code>Flux.crossentropy</code></a>, <a href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy</code></a>, <a href="#Flux.logitbinarycrossentropy"><code>Flux.logitbinarycrossentropy</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.logitcrossentropy([-1.1491, 0.8619, 0.3127], [1, 1, 0])
-3.085467254747738</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L87-L103">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.binarycrossentropy" href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">binarycrossentropy(ŷ, y; ϵ=eps(ŷ))</code></pre><p>Return <span>$-y*\log(ŷ + ϵ) - (1-y)*\log(1-ŷ + ϵ)$</span>. The <code>ϵ</code> term provides numerical stability.</p><p>Typically, the prediction <code>ŷ</code> is given by the output of a <a href="../nnlib/#NNlib.sigmoid"><code>sigmoid</code></a> activation.</p><p>See also: <a href="#Flux.crossentropy"><code>Flux.crossentropy</code></a>, <a href="#Flux.logitcrossentropy"><code>Flux.logitcrossentropy</code></a>, <a href="#Flux.logitbinarycrossentropy"><code>Flux.logitbinarycrossentropy</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
+             |  δ * (|ŷ - y| - 0.5 * δ), otherwise</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L39-L48">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.crossentropy" href="#Flux.crossentropy"><code>Flux.crossentropy</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">crossentropy(ŷ, y; weight = nothing)</code></pre><p>Return the cross entropy between the given probability distributions; calculated as <code>-sum(y .* log.(ŷ) .* weight) / size(y, 2)</code>.</p><p><code>weight</code> can be <code>Nothing</code>, a <code>Number</code> or an <code>AbstractVector</code>. <code>weight=nothing</code> acts like <code>weight=1</code> but is faster.</p><p>See also: <a href="#Flux.logitcrossentropy"><code>Flux.logitcrossentropy</code></a>, <a href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy</code></a>, <a href="#Flux.logitbinarycrossentropy"><code>Flux.logitbinarycrossentropy</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.crossentropy(softmax([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
+3.085467254747739</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L68-L84">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.logitcrossentropy" href="#Flux.logitcrossentropy"><code>Flux.logitcrossentropy</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">logitcrossentropy(ŷ, y; weight = 1)</code></pre><p>Return the crossentropy computed after a <a href="../nnlib/#NNlib.logsoftmax"><code>Flux.logsoftmax</code></a> operation; calculated as <code>-sum(y .* logsoftmax(ŷ) .* weight) / size(y, 2)</code>.</p><p><code>logitcrossentropy(ŷ, y)</code> is mathematically equivalent to <a href="#Flux.crossentropy"><code>Flux.crossentropy(softmax(log(ŷ)), y)</code></a> but it is more numerically stable.</p><p>See also: <a href="#Flux.crossentropy"><code>Flux.crossentropy</code></a>, <a href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy</code></a>, <a href="#Flux.logitbinarycrossentropy"><code>Flux.logitbinarycrossentropy</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.logitcrossentropy([-1.1491, 0.8619, 0.3127], [1, 1, 0])
+3.085467254747738</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L87-L103">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.binarycrossentropy" href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">binarycrossentropy(ŷ, y; ϵ=eps(ŷ))</code></pre><p>Return <span>$-y*\log(ŷ + ϵ) - (1-y)*\log(1-ŷ + ϵ)$</span>. The <code>ϵ</code> term provides numerical stability.</p><p>Typically, the prediction <code>ŷ</code> is given by the output of a <a href="../nnlib/#NNlib.sigmoid"><code>sigmoid</code></a> activation.</p><p>See also: <a href="#Flux.crossentropy"><code>Flux.crossentropy</code></a>, <a href="#Flux.logitcrossentropy"><code>Flux.logitcrossentropy</code></a>, <a href="#Flux.logitbinarycrossentropy"><code>Flux.logitbinarycrossentropy</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
 3-element Array{Float64,1}:
  1.424397097347566
  0.35231664672364077
- 0.8616703662235441</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L108-L125">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.logitbinarycrossentropy" href="#Flux.logitbinarycrossentropy"><code>Flux.logitbinarycrossentropy</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">logitbinarycrossentropy(ŷ, y)</code></pre><p><code>logitbinarycrossentropy(ŷ, y)</code> is mathematically equivalent to <a href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy(σ(log(ŷ)), y)</code></a> but it is more numerically stable.</p><p>See also: <a href="#Flux.crossentropy"><code>Flux.crossentropy</code></a>, <a href="#Flux.logitcrossentropy"><code>Flux.logitcrossentropy</code></a>, <a href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.logitbinarycrossentropy.([-1.1491, 0.8619, 0.3127], [1, 1, 0])
+ 0.8616703662235441</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L108-L125">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.logitbinarycrossentropy" href="#Flux.logitbinarycrossentropy"><code>Flux.logitbinarycrossentropy</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">logitbinarycrossentropy(ŷ, y)</code></pre><p><code>logitbinarycrossentropy(ŷ, y)</code> is mathematically equivalent to <a href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy(σ(log(ŷ)), y)</code></a> but it is more numerically stable.</p><p>See also: <a href="#Flux.crossentropy"><code>Flux.crossentropy</code></a>, <a href="#Flux.logitcrossentropy"><code>Flux.logitcrossentropy</code></a>, <a href="#Flux.binarycrossentropy"><code>Flux.binarycrossentropy</code></a></p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.logitbinarycrossentropy.([-1.1491, 0.8619, 0.3127], [1, 1, 0])
 3-element Array{Float64,1}:
  1.4243970973475661
  0.35231664672364094
- 0.8616703662235443</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L131-L147">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.kldivergence" href="#Flux.kldivergence"><code>Flux.kldivergence</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">kldivergence(ŷ, y)</code></pre><p>Return the <a href="https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence">Kullback-Leibler divergence</a> between the given probability distributions.</p><p>KL divergence is a measure of how much one probability distribution is different from the other. It is always non-negative and zero only when both the distributions are equal everywhere.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L185-L196">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.poisson" href="#Flux.poisson"><code>Flux.poisson</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">poisson(ŷ, y)</code></pre><p>Return how much the predicted distribution <code>ŷ</code> diverges from the expected Poisson distribution <code>y</code>; calculated as <code>sum(ŷ .- y .* log.(ŷ)) / size(y, 2)</code>.</p><p><a href="https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson">More information.</a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L203-L210">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.hinge" href="#Flux.hinge"><code>Flux.hinge</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">hinge(ŷ, y)</code></pre><p>Return the <a href="https://en.wikipedia.org/wiki/Hinge_loss">hinge loss</a> given the prediction <code>ŷ</code> and true labels <code>y</code> (containing 1 or -1); calculated as <code>sum(max.(0, 1 .- ŷ .* y)) / size(y, 2)</code>.</p><p>See also: <a href="#Flux.squared_hinge"><code>squared_hinge</code></a></p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L213-L221">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.squared_hinge" href="#Flux.squared_hinge"><code>Flux.squared_hinge</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">squared_hinge(ŷ, y)</code></pre><p>Return the squared hinge loss given the prediction <code>ŷ</code> and true labels <code>y</code> (containing 1 or -1); calculated as <code>sum((max.(0, 1 .- ŷ .* y)).^2) / size(y, 2)</code>.</p><p>See also: <a href="#Flux.hinge"><code>hinge</code></a></p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L224-L231">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.dice_coeff_loss" href="#Flux.dice_coeff_loss"><code>Flux.dice_coeff_loss</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">dice_coeff_loss(ŷ, y; smooth=1)</code></pre><p>Return a loss based on the dice coefficient. Used in the <a href="https://arxiv.org/pdf/1606.04797v1.pdf">V-Net</a> image segmentation architecture. Similar to the F1_score. Calculated as:     1 - 2<em>sum(|ŷ .</em> y| + smooth) / (sum(ŷ.^2) + sum(y.^2) + smooth)`</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L234-L242">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.tversky_loss" href="#Flux.tversky_loss"><code>Flux.tversky_loss</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">tversky_loss(ŷ, y; β=0.7)</code></pre><p>Return the <a href="https://arxiv.org/pdf/1706.05721.pdf">Tversky loss</a>. Used with imbalanced data to give more weight to false negatives. Larger β weigh recall higher than precision (by placing more emphasis on false negatives) Calculated as:     1 - sum(|y .* ŷ| + 1) / (sum(y .* ŷ + β<em>(1 .- y) .</em> ŷ + (1 - β)<em>y .</em> (1 .- ŷ)) + 1)</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/stateless.jl#L245-L253">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../regularisation/">« Regularisation</a><a class="docs-footer-nextpage" href="../advanced/">Advanced Model Building »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+ 0.8616703662235443</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L131-L147">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.kldivergence" href="#Flux.kldivergence"><code>Flux.kldivergence</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">kldivergence(ŷ, y)</code></pre><p>Return the <a href="https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence">Kullback-Leibler divergence</a> between the given probability distributions.</p><p>KL divergence is a measure of how much one probability distribution is different from the other. It is always non-negative and zero only when both the distributions are equal everywhere.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L185-L196">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.poisson" href="#Flux.poisson"><code>Flux.poisson</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">poisson(ŷ, y)</code></pre><p>Return how much the predicted distribution <code>ŷ</code> diverges from the expected Poisson distribution <code>y</code>; calculated as <code>sum(ŷ .- y .* log.(ŷ)) / size(y, 2)</code>.</p><p><a href="https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson">More information.</a>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L203-L210">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.hinge" href="#Flux.hinge"><code>Flux.hinge</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">hinge(ŷ, y)</code></pre><p>Return the <a href="https://en.wikipedia.org/wiki/Hinge_loss">hinge loss</a> given the prediction <code>ŷ</code> and true labels <code>y</code> (containing 1 or -1); calculated as <code>sum(max.(0, 1 .- ŷ .* y)) / size(y, 2)</code>.</p><p>See also: <a href="#Flux.squared_hinge"><code>squared_hinge</code></a></p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L213-L221">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.squared_hinge" href="#Flux.squared_hinge"><code>Flux.squared_hinge</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">squared_hinge(ŷ, y)</code></pre><p>Return the squared hinge loss given the prediction <code>ŷ</code> and true labels <code>y</code> (containing 1 or -1); calculated as <code>sum((max.(0, 1 .- ŷ .* y)).^2) / size(y, 2)</code>.</p><p>See also: <a href="#Flux.hinge"><code>hinge</code></a></p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L224-L231">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.dice_coeff_loss" href="#Flux.dice_coeff_loss"><code>Flux.dice_coeff_loss</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">dice_coeff_loss(ŷ, y; smooth=1)</code></pre><p>Return a loss based on the dice coefficient. Used in the <a href="https://arxiv.org/pdf/1606.04797v1.pdf">V-Net</a> image segmentation architecture. Similar to the F1_score. Calculated as:     1 - 2<em>sum(|ŷ .</em> y| + smooth) / (sum(ŷ.^2) + sum(y.^2) + smooth)`</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L234-L242">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.tversky_loss" href="#Flux.tversky_loss"><code>Flux.tversky_loss</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">tversky_loss(ŷ, y; β=0.7)</code></pre><p>Return the <a href="https://arxiv.org/pdf/1706.05721.pdf">Tversky loss</a>. Used with imbalanced data to give more weight to false negatives. Larger β weigh recall higher than precision (by placing more emphasis on false negatives) Calculated as:     1 - sum(|y .* ŷ| + 1) / (sum(y .* ŷ + β<em>(1 .- y) .</em> ŷ + (1 - β)<em>y .</em> (1 .- ŷ)) + 1)</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/stateless.jl#L245-L253">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../regularisation/">« Regularisation</a><a class="docs-footer-nextpage" href="../advanced/">Advanced Model Building »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/models/nnlib/index.html b/dev/models/nnlib/index.html
index 7f3d0d9f..4186fec8 100644
--- a/dev/models/nnlib/index.html
+++ b/dev/models/nnlib/index.html
@@ -28,4 +28,4 @@ a = randomly sampled from uniform distribution U(l, u)</code></pre><p>Randomized
 batched_adjoint(A)</code></pre><p>Equivalent to applying <code>transpose</code> or <code>adjoint</code> to each matrix <code>A[:,:,k]</code>.</p><p>These exist to control how <code>batched_mul</code> behaves, as it operated on such matrix slices of an array with <code>ndims(A)==3</code>.</p><pre><code class="language-none">BatchedTranspose{T, N, S} &lt;: AbstractBatchedMatrix{T, N}
 BatchedAdjoint{T, N, S}</code></pre><p>Lazy wrappers analogous to <code>Transpose</code> and <code>Adjoint</code>, returned by <code>batched_transpose</code></p></div></section></article><article class="docstring"><header><a class="docstring-binding" id="NNlib.batched_transpose" href="#NNlib.batched_transpose"><code>NNlib.batched_transpose</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">batched_transpose(A::AbstractArray{T,3})
 batched_adjoint(A)</code></pre><p>Equivalent to applying <code>transpose</code> or <code>adjoint</code> to each matrix <code>A[:,:,k]</code>.</p><p>These exist to control how <code>batched_mul</code> behaves, as it operated on such matrix slices of an array with <code>ndims(A)==3</code>.</p><pre><code class="language-none">BatchedTranspose{T, N, S} &lt;: AbstractBatchedMatrix{T, N}
-BatchedAdjoint{T, N, S}</code></pre><p>Lazy wrappers analogous to <code>Transpose</code> and <code>Adjoint</code>, returned by <code>batched_transpose</code></p></div></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../advanced/">« Advanced Model Building</a><a class="docs-footer-nextpage" href="../../data/onehot/">One-Hot Encoding »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+BatchedAdjoint{T, N, S}</code></pre><p>Lazy wrappers analogous to <code>Transpose</code> and <code>Adjoint</code>, returned by <code>batched_transpose</code></p></div></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../advanced/">« Advanced Model Building</a><a class="docs-footer-nextpage" href="../../data/onehot/">One-Hot Encoding »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/models/recurrence/index.html b/dev/models/recurrence/index.html
index 4818a777..2f66d8b4 100644
--- a/dev/models/recurrence/index.html
+++ b/dev/models/recurrence/index.html
@@ -39,4 +39,4 @@ m = Flux.Recur(rnn, h)
 
 y = m(x)</code></pre><p>The <code>Recur</code> wrapper stores the state between runs in the <code>m.state</code> field.</p><p>If you use the <code>RNN(10, 5)</code> constructor – as opposed to <code>RNNCell</code> – you&#39;ll see that it&#39;s simply a wrapped cell.</p><pre><code class="language-julia">julia&gt; RNN(10, 5)
 Recur(RNNCell(10, 5, tanh))</code></pre><h2 id="Sequences-1"><a class="docs-heading-anchor" href="#Sequences-1">Sequences</a><a class="docs-heading-anchor-permalink" href="#Sequences-1" title="Permalink"></a></h2><p>Often we want to work with sequences of inputs, rather than individual <code>x</code>s.</p><pre><code class="language-julia">seq = [rand(10) for i = 1:10]</code></pre><p>With <code>Recur</code>, applying our model to each element of a sequence is trivial:</p><pre><code class="language-julia">m.(seq) # returns a list of 5-element vectors</code></pre><p>This works even when we&#39;ve chain recurrent layers into a larger model.</p><pre><code class="language-julia">m = Chain(LSTM(10, 15), Dense(15, 5))
-m.(seq)</code></pre><p>Finally, we can reset the hidden state of the cell back to its initial value using <code>reset!(m)</code>.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../basics/">« Basics</a><a class="docs-footer-nextpage" href="../regularisation/">Regularisation »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+m.(seq)</code></pre><p>Finally, we can reset the hidden state of the cell back to its initial value using <code>reset!(m)</code>.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../basics/">« Basics</a><a class="docs-footer-nextpage" href="../regularisation/">Regularisation »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/models/regularisation/index.html b/dev/models/regularisation/index.html
index e4f1b335..810948ab 100644
--- a/dev/models/regularisation/index.html
+++ b/dev/models/regularisation/index.html
@@ -36,4 +36,4 @@ julia&gt; activations(c, rand(10))
  Float32[0.5192045, 0.48079553]                                  
 
 julia&gt; sum(norm, ans)
-2.1166067f0</code></pre><article class="docstring"><header><a class="docstring-binding" id="Flux.activations" href="#Flux.activations"><code>Flux.activations</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">activations(c::Chain, input)</code></pre><p>Calculate the forward results of each layers in Chain <code>c</code> with <code>input</code> as model input.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/layers/basic.jl#L67-L71">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../recurrence/">« Recurrence</a><a class="docs-footer-nextpage" href="../layers/">Model Reference »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+2.1166067f0</code></pre><article class="docstring"><header><a class="docstring-binding" id="Flux.activations" href="#Flux.activations"><code>Flux.activations</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">activations(c::Chain, input)</code></pre><p>Calculate the forward results of each layers in Chain <code>c</code> with <code>input</code> as model input.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/layers/basic.jl#L67-L71">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../recurrence/">« Recurrence</a><a class="docs-footer-nextpage" href="../layers/">Model Reference »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/performance/index.html b/dev/performance/index.html
index 5b268162..850558ed 100644
--- a/dev/performance/index.html
+++ b/dev/performance/index.html
@@ -17,4 +17,4 @@ y_batch = reduce(hcat, ys)
 function loss_total(x_batch::Matrix, y_batch::Matrix)
     y_preds = model(x_batch)
     sum(loss.(y_preds, y_batch))
-end</code></pre><p>When doing this kind of concatenation use <code>reduce(hcat, xs)</code> rather than <code>hcat(xs...)</code>. This will avoid the splatting penalty, and will hit the optimised <code>reduce</code> method.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../utilities/">« Utility Functions</a><a class="docs-footer-nextpage" href="../datasets/">Datasets »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+end</code></pre><p>When doing this kind of concatenation use <code>reduce(hcat, xs)</code> rather than <code>hcat(xs...)</code>. This will avoid the splatting penalty, and will hit the optimised <code>reduce</code> method.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../utilities/">« Utility Functions</a><a class="docs-footer-nextpage" href="../datasets/">Datasets »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/saving/index.html b/dev/saving/index.html
index 34af1dde..28fa6233 100644
--- a/dev/saving/index.html
+++ b/dev/saving/index.html
@@ -47,4 +47,4 @@ evalcb = throttle(30) do
   # Show loss
   @save &quot;model-checkpoint.bson&quot; model
 end</code></pre><p>This will update the <code>&quot;model-checkpoint.bson&quot;</code> file every thirty seconds.</p><p>You can get more advanced by saving a series of models throughout training, for example</p><pre><code class="language-julia">@save &quot;model-$(now()).bson&quot; model</code></pre><p>will produce a series of models like <code>&quot;model-2018-03-06T02:57:10.41.bson&quot;</code>. You could also store the current test set loss, so that it&#39;s easy to (for example) revert to an older copy of the model if it starts to overfit.</p><pre><code class="language-julia">@save &quot;model-$(now()).bson&quot; model loss = testloss()</code></pre><p>You can even store optimiser state alongside the model, to resume training exactly where you left off.</p><pre><code class="language-julia">opt = ADAM()
-@save &quot;model-$(now()).bson&quot; model opt</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../gpu/">« GPU Support</a><a class="docs-footer-nextpage" href="../ecosystem/">The Julia Ecosystem »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+@save &quot;model-$(now()).bson&quot; model opt</code></pre></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../gpu/">« GPU Support</a><a class="docs-footer-nextpage" href="../ecosystem/">The Julia Ecosystem »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/search/index.html b/dev/search/index.html
index bfb7390d..8728f90d 100644
--- a/dev/search/index.html
+++ b/dev/search/index.html
@@ -6,4 +6,4 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview', {'page': location.pathname + location.search + location.hash});
-</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li><a class="tocitem" href="../">Home</a></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="../models/basics/">Basics</a></li><li><a class="tocitem" href="../models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="../models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="../models/layers/">Model Reference</a></li><li><a class="tocitem" href="../models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="../models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="../data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="../training/optimisers/">Optimisers</a></li><li><a class="tocitem" href="../training/training/">Training</a></li></ul></li><li><a class="tocitem" href="../gpu/">GPU Support</a></li><li><a class="tocitem" href="../saving/">Saving &amp; Loading</a></li><li><a class="tocitem" href="../ecosystem/">The Julia Ecosystem</a></li><li><a class="tocitem" href="../utilities/">Utility Functions</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><a class="tocitem" href="../datasets/">Datasets</a></li><li><a class="tocitem" href="../community/">Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Search</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Search</a></li></ul></nav><div class="docs-right"><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article><p id="documenter-search-info">Loading search...</p><ul id="documenter-search-results"></ul></article></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body><script src="../search_index.js"></script><script src="../assets/search.js"></script></html>
+</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li><a class="tocitem" href="../">Home</a></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="../models/basics/">Basics</a></li><li><a class="tocitem" href="../models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="../models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="../models/layers/">Model Reference</a></li><li><a class="tocitem" href="../models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="../models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="../data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="../training/optimisers/">Optimisers</a></li><li><a class="tocitem" href="../training/training/">Training</a></li></ul></li><li><a class="tocitem" href="../gpu/">GPU Support</a></li><li><a class="tocitem" href="../saving/">Saving &amp; Loading</a></li><li><a class="tocitem" href="../ecosystem/">The Julia Ecosystem</a></li><li><a class="tocitem" href="../utilities/">Utility Functions</a></li><li><a class="tocitem" href="../performance/">Performance Tips</a></li><li><a class="tocitem" href="../datasets/">Datasets</a></li><li><a class="tocitem" href="../community/">Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li class="is-active"><a href>Search</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Search</a></li></ul></nav><div class="docs-right"><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article><p id="documenter-search-info">Loading search...</p><ul id="documenter-search-results"></ul></article></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body><script src="../search_index.js"></script><script src="../assets/search.js"></script></html>
diff --git a/dev/training/optimisers/index.html b/dev/training/optimisers/index.html
index feefed1d..46e7b911 100644
--- a/dev/training/optimisers/index.html
+++ b/dev/training/optimisers/index.html
@@ -27,8 +27,8 @@ end</code></pre><p>Running this will alter the parameters <code>W</code> and <co
 
 for p in (W, b)
   update!(opt, p, grads[p])
-end</code></pre><p>An optimiser <code>update!</code> accepts a parameter and a gradient, and updates the parameter according to the chosen rule. We can also pass <code>opt</code> to our <a href="../training/">training loop</a>, which will update all parameters of the model in a loop. However, we can now easily replace <code>Descent</code> with a more advanced optimiser such as <code>ADAM</code>.</p><h2 id="Optimiser-Reference-1"><a class="docs-heading-anchor" href="#Optimiser-Reference-1">Optimiser Reference</a><a class="docs-heading-anchor-permalink" href="#Optimiser-Reference-1" title="Permalink"></a></h2><p>All optimisers return an object that, when passed to <code>train!</code>, will update the parameters passed to it.</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.update!" href="#Flux.Optimise.update!"><code>Flux.Optimise.update!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">update!(x, x̄)</code></pre><p>Update the array <code>x</code> according to <code>x .-= x̄</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/train.jl#L6-L10">source</a></section><section><div><pre><code class="language-none">update!(opt, p, g)
-update!(opt, ps::Params, gs)</code></pre><p>Perform an update step of the parameters <code>ps</code> (or the single parameter <code>p</code>) according to optimizer <code>opt</code>  and the gradients <code>gs</code> (the gradient <code>g</code>).</p><p>As a result, the parameters are mutated and the optimizer&#39;s internal state may change.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/train.jl#L15-L23">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.Descent" href="#Flux.Optimise.Descent"><code>Flux.Optimise.Descent</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Descent(η = 0.1)</code></pre><p>Classic gradient descent optimiser with learning rate <code>η</code>. For each parameter <code>p</code> and its gradient <code>δp</code>, this runs <code>p -= η*δp</code></p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = Descent()
+end</code></pre><p>An optimiser <code>update!</code> accepts a parameter and a gradient, and updates the parameter according to the chosen rule. We can also pass <code>opt</code> to our <a href="../training/">training loop</a>, which will update all parameters of the model in a loop. However, we can now easily replace <code>Descent</code> with a more advanced optimiser such as <code>ADAM</code>.</p><h2 id="Optimiser-Reference-1"><a class="docs-heading-anchor" href="#Optimiser-Reference-1">Optimiser Reference</a><a class="docs-heading-anchor-permalink" href="#Optimiser-Reference-1" title="Permalink"></a></h2><p>All optimisers return an object that, when passed to <code>train!</code>, will update the parameters passed to it.</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.update!" href="#Flux.Optimise.update!"><code>Flux.Optimise.update!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">update!(x, x̄)</code></pre><p>Update the array <code>x</code> according to <code>x .-= x̄</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/train.jl#L6-L10">source</a></section><section><div><pre><code class="language-none">update!(opt, p, g)
+update!(opt, ps::Params, gs)</code></pre><p>Perform an update step of the parameters <code>ps</code> (or the single parameter <code>p</code>) according to optimizer <code>opt</code>  and the gradients <code>gs</code> (the gradient <code>g</code>).</p><p>As a result, the parameters are mutated and the optimizer&#39;s internal state may change.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/train.jl#L15-L23">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.Descent" href="#Flux.Optimise.Descent"><code>Flux.Optimise.Descent</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Descent(η = 0.1)</code></pre><p>Classic gradient descent optimiser with learning rate <code>η</code>. For each parameter <code>p</code> and its gradient <code>δp</code>, this runs <code>p -= η*δp</code></p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = Descent()
 
 opt = Descent(0.3)
 
@@ -38,29 +38,29 @@ gs = gradient(ps) do
     loss(x, y)
 end
 
-Flux.Optimise.update!(opt, ps, gs)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L8-L32">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.Momentum" href="#Flux.Optimise.Momentum"><code>Flux.Optimise.Momentum</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Momentum(η = 0.01, ρ = 0.9)</code></pre><p>Gradient descent optimizer with learning rate <code>η</code> and momentum <code>ρ</code>.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Momentum (<code>ρ</code>): Controls the acceleration of gradient descent in the                 prominent direction, in effect dampening oscillations.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = Momentum()
+Flux.Optimise.update!(opt, ps, gs)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L8-L32">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.Momentum" href="#Flux.Optimise.Momentum"><code>Flux.Optimise.Momentum</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Momentum(η = 0.01, ρ = 0.9)</code></pre><p>Gradient descent optimizer with learning rate <code>η</code> and momentum <code>ρ</code>.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Momentum (<code>ρ</code>): Controls the acceleration of gradient descent in the                 prominent direction, in effect dampening oscillations.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = Momentum()
 
-opt = Momentum(0.01, 0.99)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L43-L60">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.Nesterov" href="#Flux.Optimise.Nesterov"><code>Flux.Optimise.Nesterov</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Nesterov(η = 0.001, ρ = 0.9)</code></pre><p>Gradient descent optimizer with learning rate <code>η</code> and Nesterov momentum <code>ρ</code>.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Nesterov momentum (<code>ρ</code>): Controls the acceleration of gradient descent in the                          prominent direction, in effect dampening oscillations.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = Nesterov()
+opt = Momentum(0.01, 0.99)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L43-L60">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.Nesterov" href="#Flux.Optimise.Nesterov"><code>Flux.Optimise.Nesterov</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">Nesterov(η = 0.001, ρ = 0.9)</code></pre><p>Gradient descent optimizer with learning rate <code>η</code> and Nesterov momentum <code>ρ</code>.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Nesterov momentum (<code>ρ</code>): Controls the acceleration of gradient descent in the                          prominent direction, in effect dampening oscillations.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = Nesterov()
 
-opt = Nesterov(0.003, 0.95)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L76-L93">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.RMSProp" href="#Flux.Optimise.RMSProp"><code>Flux.Optimise.RMSProp</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">RMSProp(η = 0.001, ρ = 0.9)</code></pre><p>Optimizer using the <a href="https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf">RMSProp</a> algorithm. Often a good choice for recurrent networks. Parameters other than learning rate generally don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Momentum (<code>ρ</code>): Controls the acceleration of gradient descent in the                 prominent direction, in effect dampening oscillations.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = RMSProp()
+opt = Nesterov(0.003, 0.95)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L76-L93">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.RMSProp" href="#Flux.Optimise.RMSProp"><code>Flux.Optimise.RMSProp</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">RMSProp(η = 0.001, ρ = 0.9)</code></pre><p>Optimizer using the <a href="https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf">RMSProp</a> algorithm. Often a good choice for recurrent networks. Parameters other than learning rate generally don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Momentum (<code>ρ</code>): Controls the acceleration of gradient descent in the                 prominent direction, in effect dampening oscillations.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = RMSProp()
 
-opt = RMSProp(0.002, 0.95)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L110-L130">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ADAM" href="#Flux.Optimise.ADAM"><code>Flux.Optimise.ADAM</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ADAM(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p><a href="https://arxiv.org/abs/1412.6980v8">ADAM</a> optimiser.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAM()
+opt = RMSProp(0.002, 0.95)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L110-L130">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ADAM" href="#Flux.Optimise.ADAM"><code>Flux.Optimise.ADAM</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ADAM(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p><a href="https://arxiv.org/abs/1412.6980v8">ADAM</a> optimiser.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAM()
 
-opt = ADAM(0.001, (0.9, 0.8))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L146-L163">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.RADAM" href="#Flux.Optimise.RADAM"><code>Flux.Optimise.RADAM</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">RADAM(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p><a href="https://arxiv.org/pdf/1908.03265v1.pdf">Rectified ADAM</a> optimizer.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = RADAM()
+opt = ADAM(0.001, (0.9, 0.8))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L146-L163">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.RADAM" href="#Flux.Optimise.RADAM"><code>Flux.Optimise.RADAM</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">RADAM(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p><a href="https://arxiv.org/pdf/1908.03265v1.pdf">Rectified ADAM</a> optimizer.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = RADAM()
 
-opt = RADAM(0.001, (0.9, 0.8))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L182-L199">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.AdaMax" href="#Flux.Optimise.AdaMax"><code>Flux.Optimise.AdaMax</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">AdaMax(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p><a href="https://arxiv.org/abs/1412.6980v9">AdaMax</a> is a variant of ADAM based on the ∞-norm.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = AdaMax()
+opt = RADAM(0.001, (0.9, 0.8))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L182-L199">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.AdaMax" href="#Flux.Optimise.AdaMax"><code>Flux.Optimise.AdaMax</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">AdaMax(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p><a href="https://arxiv.org/abs/1412.6980v9">AdaMax</a> is a variant of ADAM based on the ∞-norm.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = AdaMax()
 
-opt = AdaMax(0.001, (0.9, 0.995))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L225-L242">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ADAGrad" href="#Flux.Optimise.ADAGrad"><code>Flux.Optimise.ADAGrad</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ADAGrad(η = 0.1)</code></pre><p><a href="http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf">ADAGrad</a> optimizer. It has parameter specific learning rates based on how frequently it is updated. Parameters don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAGrad()
+opt = AdaMax(0.001, (0.9, 0.995))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L225-L242">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ADAGrad" href="#Flux.Optimise.ADAGrad"><code>Flux.Optimise.ADAGrad</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ADAGrad(η = 0.1)</code></pre><p><a href="http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf">ADAGrad</a> optimizer. It has parameter specific learning rates based on how frequently it is updated. Parameters don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAGrad()
 
-opt = ADAGrad(0.001)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L261-L278">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ADADelta" href="#Flux.Optimise.ADADelta"><code>Flux.Optimise.ADADelta</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ADADelta(ρ = 0.9)</code></pre><p><a href="https://arxiv.org/abs/1212.5701">ADADelta</a> is a version of ADAGrad adapting its learning rate based on a window of past gradient updates. Parameters don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Rho (<code>ρ</code>): Factor by which the gradient is decayed at each time step.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADADelta()
+opt = ADAGrad(0.001)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L261-L278">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ADADelta" href="#Flux.Optimise.ADADelta"><code>Flux.Optimise.ADADelta</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ADADelta(ρ = 0.9)</code></pre><p><a href="https://arxiv.org/abs/1212.5701">ADADelta</a> is a version of ADAGrad adapting its learning rate based on a window of past gradient updates. Parameters don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Rho (<code>ρ</code>): Factor by which the gradient is decayed at each time step.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADADelta()
 
-opt = ADADelta(0.89)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L293-L309">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.AMSGrad" href="#Flux.Optimise.AMSGrad"><code>Flux.Optimise.AMSGrad</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">AMSGrad(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p>The <a href="https://openreview.net/forum?id=ryQu7f-RZ">AMSGrad</a> version of the ADAM optimiser. Parameters don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = AMSGrad()
+opt = ADADelta(0.89)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L293-L309">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.AMSGrad" href="#Flux.Optimise.AMSGrad"><code>Flux.Optimise.AMSGrad</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">AMSGrad(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p>The <a href="https://openreview.net/forum?id=ryQu7f-RZ">AMSGrad</a> version of the ADAM optimiser. Parameters don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = AMSGrad()
 
-opt = AMSGrad(0.001, (0.89, 0.995))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L326-L344">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.NADAM" href="#Flux.Optimise.NADAM"><code>Flux.Optimise.NADAM</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">NADAM(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p><a href="http://cs229.stanford.edu/proj2015/054_report.pdf">NADAM</a> is a Nesterov variant of ADAM. Parameters don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = NADAM()
+opt = AMSGrad(0.001, (0.89, 0.995))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L326-L344">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.NADAM" href="#Flux.Optimise.NADAM"><code>Flux.Optimise.NADAM</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">NADAM(η = 0.001, β::Tuple = (0.9, 0.999))</code></pre><p><a href="http://cs229.stanford.edu/proj2015/054_report.pdf">NADAM</a> is a Nesterov variant of ADAM. Parameters don&#39;t need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = NADAM()
 
-opt = NADAM(0.002, (0.89, 0.995))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L362-L380">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ADAMW" href="#Flux.Optimise.ADAMW"><code>Flux.Optimise.ADAMW</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">ADAMW(η = 0.001, β::Tuple = (0.9, 0.999), decay = 0)</code></pre><p><a href="https://arxiv.org/abs/1711.05101">ADAMW</a> is a variant of ADAM fixing (as in repairing) its weight decay regularization.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li><li><code>decay</code>: Decay applied to weights during optimisation.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAMW()
+opt = NADAM(0.002, (0.89, 0.995))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L362-L380">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ADAMW" href="#Flux.Optimise.ADAMW"><code>Flux.Optimise.ADAMW</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">ADAMW(η = 0.001, β::Tuple = (0.9, 0.999), decay = 0)</code></pre><p><a href="https://arxiv.org/abs/1711.05101">ADAMW</a> is a variant of ADAM fixing (as in repairing) its weight decay regularization.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li>Decay of momentums (<code>β::Tuple</code>): Exponential decay for the first (β1) and the                                  second (β2) momentum estimate.</li><li><code>decay</code>: Decay applied to weights during optimisation.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAMW()
 
-opt = ADAMW(0.001, (0.89, 0.995), 0.1)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L399-L418">source</a></section></article><h2 id="Optimiser-Interface-1"><a class="docs-heading-anchor" href="#Optimiser-Interface-1">Optimiser Interface</a><a class="docs-heading-anchor-permalink" href="#Optimiser-Interface-1" title="Permalink"></a></h2><p>Flux&#39;s optimisers are built around a <code>struct</code> that holds all the optimiser parameters along with a definition of how to apply the update rule associated with it. We do this via the <code>apply!</code> function which takes the optimiser as the first argument followed by the parameter and its corresponding gradient.</p><p>In this manner Flux also allows one to create custom optimisers to be used seamlessly. Let&#39;s work this with a simple example.</p><pre><code class="language-julia">mutable struct Momentum
+opt = ADAMW(0.001, (0.89, 0.995), 0.1)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L399-L418">source</a></section></article><h2 id="Optimiser-Interface-1"><a class="docs-heading-anchor" href="#Optimiser-Interface-1">Optimiser Interface</a><a class="docs-heading-anchor-permalink" href="#Optimiser-Interface-1" title="Permalink"></a></h2><p>Flux&#39;s optimisers are built around a <code>struct</code> that holds all the optimiser parameters along with a definition of how to apply the update rule associated with it. We do this via the <code>apply!</code> function which takes the optimiser as the first argument followed by the parameter and its corresponding gradient.</p><p>In this manner Flux also allows one to create custom optimisers to be used seamlessly. Let&#39;s work this with a simple example.</p><pre><code class="language-julia">mutable struct Momentum
   eta
   rho
   velocity
@@ -88,4 +88,4 @@ end
 
 loss(rand(10)) # around 0.9</code></pre><p>In this manner it is possible to compose optimisers for some added flexibility.</p><h2 id="Decays-1"><a class="docs-heading-anchor" href="#Decays-1">Decays</a><a class="docs-heading-anchor-permalink" href="#Decays-1" title="Permalink"></a></h2><p>Similar to optimisers, Flux also defines some simple decays that can be used in conjunction with other optimisers, or standalone.</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.ExpDecay" href="#Flux.Optimise.ExpDecay"><code>Flux.Optimise.ExpDecay</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">ExpDecay(η = 0.001, decay = 0.1, decay_step = 1000, clip = 1e-4)</code></pre><p>Discount the learning rate <code>η</code> by the factor <code>decay</code> every <code>decay_step</code> steps till a minimum of <code>clip</code>.</p><p><strong>Parameters</strong></p><ul><li>Learning rate (<code>η</code>): Amount by which gradients are discounted before updating                      the weights.</li><li><code>decay</code>: Factor by which the learning rate is discounted.</li><li><code>decay_step</code>: Schedule decay operations by setting the number of steps between               two decay operations.</li><li><code>clip</code>: Minimum value of learning rate.</li></ul><p><strong>Examples</strong></p><p>To apply exponential decay to an optimiser:</p><pre><code class="language-julia">Optimiser(ExpDecay(..), Opt(..))
 
-opt = Optimiser(ExpDecay(), ADAM())</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L476-L497">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.InvDecay" href="#Flux.Optimise.InvDecay"><code>Flux.Optimise.InvDecay</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">InvDecay(γ = 0.001)</code></pre><p>Apply inverse time decay to an optimiser, so that the effective step size at iteration <code>n</code> is <code>eta / (1 + γ * n)</code> where <code>eta</code> is the initial step size. The wrapped optimiser&#39;s step size is not modified.</p><p><strong>Examples</strong></p><pre><code class="language-julia">Optimiser(InvDecay(..), Opt(..))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L449-L460">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.WeightDecay" href="#Flux.Optimise.WeightDecay"><code>Flux.Optimise.WeightDecay</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">WeightDecay(wd = 0)</code></pre><p>Decay weights by <code>wd</code>.</p><p><strong>Parameters</strong></p><ul><li>Weight decay (<code>wd</code>)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/optimisers.jl#L518-L525">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../data/dataloader/">« DataLoader</a><a class="docs-footer-nextpage" href="../training/">Training »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+opt = Optimiser(ExpDecay(), ADAM())</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L476-L497">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.InvDecay" href="#Flux.Optimise.InvDecay"><code>Flux.Optimise.InvDecay</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">InvDecay(γ = 0.001)</code></pre><p>Apply inverse time decay to an optimiser, so that the effective step size at iteration <code>n</code> is <code>eta / (1 + γ * n)</code> where <code>eta</code> is the initial step size. The wrapped optimiser&#39;s step size is not modified.</p><p><strong>Examples</strong></p><pre><code class="language-julia">Optimiser(InvDecay(..), Opt(..))</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L449-L460">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.WeightDecay" href="#Flux.Optimise.WeightDecay"><code>Flux.Optimise.WeightDecay</code></a> — <span class="docstring-category">Type</span></header><section><div><pre><code class="language-julia">WeightDecay(wd = 0)</code></pre><p>Decay weights by <code>wd</code>.</p><p><strong>Parameters</strong></p><ul><li>Weight decay (<code>wd</code>)</li></ul></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/optimisers.jl#L518-L525">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../../data/dataloader/">« DataLoader</a><a class="docs-footer-nextpage" href="../training/">Training »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/training/training/index.html b/dev/training/training/index.html
index 70e014df..f69d4bb2 100644
--- a/dev/training/training/index.html
+++ b/dev/training/training/index.html
@@ -6,7 +6,7 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview', {'page': location.pathname + location.search + location.hash});
-</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action="../../search/"><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li><a class="tocitem" href="../../">Home</a></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="../../models/basics/">Basics</a></li><li><a class="tocitem" href="../../models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="../../models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="../../models/layers/">Model Reference</a></li><li><a class="tocitem" href="../../models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="../../models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="../../data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="../../data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="../optimisers/">Optimisers</a></li><li class="is-active"><a class="tocitem" href>Training</a><ul class="internal"><li><a class="tocitem" href="#Loss-Functions-1"><span>Loss Functions</span></a></li><li><a class="tocitem" href="#Model-parameters-1"><span>Model parameters</span></a></li><li><a class="tocitem" href="#Datasets-1"><span>Datasets</span></a></li><li><a class="tocitem" href="#Callbacks-1"><span>Callbacks</span></a></li><li><a class="tocitem" href="#Custom-Training-loops-1"><span>Custom Training loops</span></a></li></ul></li></ul></li><li><a class="tocitem" href="../../gpu/">GPU Support</a></li><li><a class="tocitem" href="../../saving/">Saving &amp; Loading</a></li><li><a class="tocitem" href="../../ecosystem/">The Julia Ecosystem</a></li><li><a class="tocitem" href="../../utilities/">Utility Functions</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><a class="tocitem" href="../../datasets/">Datasets</a></li><li><a class="tocitem" href="../../community/">Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Training Models</a></li><li class="is-active"><a href>Training</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Training</a></li></ul></nav><div class="docs-right"><a class="docs-edit-link" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/training.md" title="Edit on GitHub"><span class="docs-icon fab"></span><span class="docs-label is-hidden-touch">Edit on GitHub</span></a><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article class="content" id="documenter-page"><h1 id="Training-1"><a class="docs-heading-anchor" href="#Training-1">Training</a><a class="docs-heading-anchor-permalink" href="#Training-1" title="Permalink"></a></h1><p>To actually train a model we need four things:</p><ul><li>A <em>objective function</em>, that evaluates how well a model is doing given some input data.</li><li>The trainable parameters of the model.</li><li>A collection of data points that will be provided to the objective function.</li><li>An <a href="../optimisers/">optimiser</a> that will update the model parameters appropriately.</li></ul><p>With these we can call <code>train!</code>:</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.train!" href="#Flux.Optimise.train!"><code>Flux.Optimise.train!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">train!(loss, params, data, opt; cb)</code></pre><p>For each datapoint <code>d</code> in <code>data</code> compute the gradient of <code>loss(d...)</code> through backpropagation and call the optimizer <code>opt</code>.</p><p>In case datapoints <code>d</code> are of numeric array type, assume no splatting is needed and compute the gradient of <code>loss(d)</code>.</p><p>A callback is given with the keyword argument <code>cb</code>. For example, this will print &quot;training&quot; every 10 seconds (using <a href="../../utilities/#Flux.throttle"><code>Flux.throttle</code></a>):</p><p>train!(loss, params, data, opt,          cb = throttle(() -&gt; println(&quot;training&quot;), 10))</p><p>The callback can call <a href="../../utilities/#Flux.Optimise.stop"><code>Flux.stop</code></a> to interrupt the training loop.</p><p>Multiple optimisers and callbacks can be passed to <code>opt</code> and <code>cb</code> as arrays.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/train.jl#L59-L77">source</a></section></article><p>There are plenty of examples in the <a href="https://github.com/FluxML/model-zoo">model zoo</a>.</p><h2 id="Loss-Functions-1"><a class="docs-heading-anchor" href="#Loss-Functions-1">Loss Functions</a><a class="docs-heading-anchor-permalink" href="#Loss-Functions-1" title="Permalink"></a></h2><p>The objective function must return a number representing how far the model is from its target – the <em>loss</em> of the model. The <code>loss</code> function that we defined in <a href="../../models/basics/">basics</a> will work as an objective. We can also define an objective in terms of some model:</p><pre><code class="language-julia">m = Chain(
+</script><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/fontawesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/solid.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.11.2/css/brands.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/katex.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-dark.css" data-theme-name="documenter-dark"/><link class="docs-theme-link" rel="stylesheet" type="text/css" href="../../assets/themes/documenter-light.css" data-theme-name="documenter-light" data-theme-primary/><script src="../../assets/themeswap.js"></script></head><body><div id="documenter"><nav class="docs-sidebar"><div class="docs-package-name"><span class="docs-autofit">Flux</span></div><form class="docs-search" action="../../search/"><input class="docs-search-query" id="documenter-search-query" name="q" type="text" placeholder="Search docs"/></form><ul class="docs-menu"><li><a class="tocitem" href="../../">Home</a></li><li><span class="tocitem">Building Models</span><ul><li><a class="tocitem" href="../../models/basics/">Basics</a></li><li><a class="tocitem" href="../../models/recurrence/">Recurrence</a></li><li><a class="tocitem" href="../../models/regularisation/">Regularisation</a></li><li><a class="tocitem" href="../../models/layers/">Model Reference</a></li><li><a class="tocitem" href="../../models/advanced/">Advanced Model Building</a></li><li><a class="tocitem" href="../../models/nnlib/">NNlib</a></li></ul></li><li><span class="tocitem">Handling Data</span><ul><li><a class="tocitem" href="../../data/onehot/">One-Hot Encoding</a></li><li><a class="tocitem" href="../../data/dataloader/">DataLoader</a></li></ul></li><li><span class="tocitem">Training Models</span><ul><li><a class="tocitem" href="../optimisers/">Optimisers</a></li><li class="is-active"><a class="tocitem" href>Training</a><ul class="internal"><li><a class="tocitem" href="#Loss-Functions-1"><span>Loss Functions</span></a></li><li><a class="tocitem" href="#Model-parameters-1"><span>Model parameters</span></a></li><li><a class="tocitem" href="#Datasets-1"><span>Datasets</span></a></li><li><a class="tocitem" href="#Callbacks-1"><span>Callbacks</span></a></li><li><a class="tocitem" href="#Custom-Training-loops-1"><span>Custom Training loops</span></a></li></ul></li></ul></li><li><a class="tocitem" href="../../gpu/">GPU Support</a></li><li><a class="tocitem" href="../../saving/">Saving &amp; Loading</a></li><li><a class="tocitem" href="../../ecosystem/">The Julia Ecosystem</a></li><li><a class="tocitem" href="../../utilities/">Utility Functions</a></li><li><a class="tocitem" href="../../performance/">Performance Tips</a></li><li><a class="tocitem" href="../../datasets/">Datasets</a></li><li><a class="tocitem" href="../../community/">Community</a></li></ul><div class="docs-version-selector field has-addons"><div class="control"><span class="docs-label button is-static is-size-7">Version</span></div><div class="docs-selector control is-expanded"><div class="select is-fullwidth is-size-7"><select id="documenter-version-selector"></select></div></div></div></nav><div class="docs-main"><header class="docs-navbar"><nav class="breadcrumb"><ul class="is-hidden-mobile"><li><a class="is-disabled">Training Models</a></li><li class="is-active"><a href>Training</a></li></ul><ul class="is-hidden-tablet"><li class="is-active"><a href>Training</a></li></ul></nav><div class="docs-right"><a class="docs-edit-link" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/training.md" title="Edit on GitHub"><span class="docs-icon fab"></span><span class="docs-label is-hidden-touch">Edit on GitHub</span></a><a class="docs-settings-button fas fa-cog" id="documenter-settings-button" href="#" title="Settings"></a><a class="docs-sidebar-button fa fa-bars is-hidden-desktop" id="documenter-sidebar-button" href="#"></a></div></header><article class="content" id="documenter-page"><h1 id="Training-1"><a class="docs-heading-anchor" href="#Training-1">Training</a><a class="docs-heading-anchor-permalink" href="#Training-1" title="Permalink"></a></h1><p>To actually train a model we need four things:</p><ul><li>A <em>objective function</em>, that evaluates how well a model is doing given some input data.</li><li>The trainable parameters of the model.</li><li>A collection of data points that will be provided to the objective function.</li><li>An <a href="../optimisers/">optimiser</a> that will update the model parameters appropriately.</li></ul><p>With these we can call <code>train!</code>:</p><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.train!" href="#Flux.Optimise.train!"><code>Flux.Optimise.train!</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">train!(loss, params, data, opt; cb)</code></pre><p>For each datapoint <code>d</code> in <code>data</code> compute the gradient of <code>loss(d...)</code> through backpropagation and call the optimizer <code>opt</code>.</p><p>In case datapoints <code>d</code> are of numeric array type, assume no splatting is needed and compute the gradient of <code>loss(d)</code>.</p><p>A callback is given with the keyword argument <code>cb</code>. For example, this will print &quot;training&quot; every 10 seconds (using <a href="../../utilities/#Flux.throttle"><code>Flux.throttle</code></a>):</p><p>train!(loss, params, data, opt,          cb = throttle(() -&gt; println(&quot;training&quot;), 10))</p><p>The callback can call <a href="../../utilities/#Flux.Optimise.stop"><code>Flux.stop</code></a> to interrupt the training loop.</p><p>Multiple optimisers and callbacks can be passed to <code>opt</code> and <code>cb</code> as arrays.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/train.jl#L59-L77">source</a></section></article><p>There are plenty of examples in the <a href="https://github.com/FluxML/model-zoo">model zoo</a>.</p><h2 id="Loss-Functions-1"><a class="docs-heading-anchor" href="#Loss-Functions-1">Loss Functions</a><a class="docs-heading-anchor-permalink" href="#Loss-Functions-1" title="Permalink"></a></h2><p>The objective function must return a number representing how far the model is from its target – the <em>loss</em> of the model. The <code>loss</code> function that we defined in <a href="../../models/basics/">basics</a> will work as an objective. We can also define an objective in terms of some model:</p><pre><code class="language-julia">m = Chain(
   Dense(784, 32, σ),
   Dense(32, 10), softmax)
 
@@ -36,7 +36,7 @@ julia&gt; @epochs 2 Flux.train!(...)
 [ Info: Epoch 1
 hello
 [ Info: Epoch 2
-hello</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/train.jl#L104-L118">source</a></section></article><h2 id="Callbacks-1"><a class="docs-heading-anchor" href="#Callbacks-1">Callbacks</a><a class="docs-heading-anchor-permalink" href="#Callbacks-1" title="Permalink"></a></h2><p><code>train!</code> takes an additional argument, <code>cb</code>, that&#39;s used for callbacks so that you can observe the training process. For example:</p><pre><code class="language-julia">train!(objective, ps, data, opt, cb = () -&gt; println(&quot;training&quot;))</code></pre><p>Callbacks are called for every batch of training data. You can slow this down using <code>Flux.throttle(f, timeout)</code> which prevents <code>f</code> from being called more than once every <code>timeout</code> seconds.</p><p>A more typical callback might look like this:</p><pre><code class="language-julia">test_x, test_y = # ... create single batch of test data ...
+hello</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/train.jl#L104-L118">source</a></section></article><h2 id="Callbacks-1"><a class="docs-heading-anchor" href="#Callbacks-1">Callbacks</a><a class="docs-heading-anchor-permalink" href="#Callbacks-1" title="Permalink"></a></h2><p><code>train!</code> takes an additional argument, <code>cb</code>, that&#39;s used for callbacks so that you can observe the training process. For example:</p><pre><code class="language-julia">train!(objective, ps, data, opt, cb = () -&gt; println(&quot;training&quot;))</code></pre><p>Callbacks are called for every batch of training data. You can slow this down using <code>Flux.throttle(f, timeout)</code> which prevents <code>f</code> from being called more than once every <code>timeout</code> seconds.</p><p>A more typical callback might look like this:</p><pre><code class="language-julia">test_x, test_y = # ... create single batch of test data ...
 evalcb() = @show(loss(test_x, test_y))
 
 Flux.train!(objective, ps, data, opt,
@@ -55,4 +55,4 @@ end</code></pre><h2 id="Custom-Training-loops-1"><a class="docs-heading-anchor"
     update!(opt, ps, gs)
     # Here you might like to check validation set accuracy, and break out to do early stopping
   end
-end</code></pre><p>You could simplify this further, for example by hard-coding in the loss function.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../optimisers/">« Optimisers</a><a class="docs-footer-nextpage" href="../../gpu/">GPU Support »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+end</code></pre><p>You could simplify this further, for example by hard-coding in the loss function.</p></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../optimisers/">« Optimisers</a><a class="docs-footer-nextpage" href="../../gpu/">GPU Support »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
diff --git a/dev/utilities/index.html b/dev/utilities/index.html
index 63a91dfb..cc323673 100644
--- a/dev/utilities/index.html
+++ b/dev/utilities/index.html
@@ -24,7 +24,7 @@ julia&gt; Flux.unsqueeze([1 2; 3 4], 2)
 
 [:, :, 2] =
  2
- 4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L46-L74">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.stack" href="#Flux.stack"><code>Flux.stack</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">stack(xs, dim)</code></pre><p>Concatenate the given <code>Array</code> of <code>Array</code>s <code>xs</code> into a single <code>Array</code> along the given dimension <code>dim</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; xs = [[1, 2], [3, 4], [5, 6]]
+ 4</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L46-L74">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.stack" href="#Flux.stack"><code>Flux.stack</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">stack(xs, dim)</code></pre><p>Concatenate the given <code>Array</code> of <code>Array</code>s <code>xs</code> into a single <code>Array</code> along the given dimension <code>dim</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; xs = [[1, 2], [3, 4], [5, 6]]
 3-element Array{Array{Int64,1},1}:
  [1, 2]
  [3, 4]
@@ -40,12 +40,12 @@ julia&gt; cat(xs, dims=1)
 3-element Array{Array{Int64,1},1}:
  [1, 2]
  [3, 4]
- [5, 6]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L77-L103">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.unstack" href="#Flux.unstack"><code>Flux.unstack</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">unstack(xs, dim)</code></pre><p>Unroll the given <code>xs</code> into an <code>Array</code> of <code>Array</code>s along the given dimension <code>dim</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.unstack([1 3 5 7; 2 4 6 8], 2)
+ [5, 6]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L77-L103">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.unstack" href="#Flux.unstack"><code>Flux.unstack</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">unstack(xs, dim)</code></pre><p>Unroll the given <code>xs</code> into an <code>Array</code> of <code>Array</code>s along the given dimension <code>dim</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.unstack([1 3 5 7; 2 4 6 8], 2)
 4-element Array{Array{Int64,1},1}:
  [1, 2]
  [3, 4]
  [5, 6]
- [7, 8]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L106-L120">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.chunk" href="#Flux.chunk"><code>Flux.chunk</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">chunk(xs, n)</code></pre><p>Split <code>xs</code> into <code>n</code> parts.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.chunk(1:10, 3)
+ [7, 8]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L106-L120">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.chunk" href="#Flux.chunk"><code>Flux.chunk</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">chunk(xs, n)</code></pre><p>Split <code>xs</code> into <code>n</code> parts.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.chunk(1:10, 3)
 3-element Array{UnitRange{Int64},1}:
  1:4
  5:8
@@ -55,18 +55,18 @@ julia&gt; Flux.chunk(collect(1:10), 3)
 3-element Array{SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true},1}:
  [1, 2, 3, 4]
  [5, 6, 7, 8]
- [9, 10]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L123-L142">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.frequencies" href="#Flux.frequencies"><code>Flux.frequencies</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">frequencies(xs)</code></pre><p>Count the number of times that each element of <code>xs</code> appears.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.frequencies([&#39;a&#39;,&#39;b&#39;,&#39;b&#39;])
+ [9, 10]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L123-L142">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.frequencies" href="#Flux.frequencies"><code>Flux.frequencies</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">frequencies(xs)</code></pre><p>Count the number of times that each element of <code>xs</code> appears.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.frequencies([&#39;a&#39;,&#39;b&#39;,&#39;b&#39;])
 Dict{Char,Int64} with 2 entries:
   &#39;a&#39; =&gt; 1
-  &#39;b&#39; =&gt; 2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L147-L159">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.batch" href="#Flux.batch"><code>Flux.batch</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">batch(xs)</code></pre><p>Batch the arrays in <code>xs</code> into a single array.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.batch([[1,2,3],[4,5,6]])
+  &#39;b&#39; =&gt; 2</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L147-L159">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.batch" href="#Flux.batch"><code>Flux.batch</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">batch(xs)</code></pre><p>Batch the arrays in <code>xs</code> into a single array.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.batch([[1,2,3],[4,5,6]])
 3×2 Array{Int64,2}:
  1  4
  2  5
- 3  6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L172-L185">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.batchseq" href="#Flux.batchseq"><code>Flux.batchseq</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">batchseq(seqs, pad)</code></pre><p>Take a list of <code>N</code> sequences, and turn them into a single sequence where each item is a batch of <code>N</code>. Short sequences will be padded by <code>pad</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.batchseq([[1, 2, 3], [4, 5]], 0)
+ 3  6</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L172-L185">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.batchseq" href="#Flux.batchseq"><code>Flux.batchseq</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">batchseq(seqs, pad)</code></pre><p>Take a list of <code>N</code> sequences, and turn them into a single sequence where each item is a batch of <code>N</code>. Short sequences will be padded by <code>pad</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.batchseq([[1, 2, 3], [4, 5]], 0)
 3-element Array{Array{Int64,1},1}:
  [1, 4]
  [2, 5]
- [3, 0]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L217-L231">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Base.rpad-Tuple{AbstractArray{T,1} where T,Integer,Any}" href="#Base.rpad-Tuple{AbstractArray{T,1} where T,Integer,Any}"><code>Base.rpad</code></a> — <span class="docstring-category">Method</span></header><section><div><p>Return the given sequence padded with <code>p</code> up to a maximum length of <code>n</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; rpad([1, 2], 4, 0)
+ [3, 0]</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L217-L231">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Base.rpad-Tuple{AbstractArray{T,1} where T,Integer,Any}" href="#Base.rpad-Tuple{AbstractArray{T,1} where T,Integer,Any}"><code>Base.rpad</code></a> — <span class="docstring-category">Method</span></header><section><div><p>Return the given sequence padded with <code>p</code> up to a maximum length of <code>n</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; rpad([1, 2], 4, 0)
 4-element Array{Int64,1}:
  1
  2
@@ -77,15 +77,15 @@ julia&gt; rpad([1, 2, 3], 2, 0)
 3-element Array{Int64,1}:
  1
  2
- 3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L196-L214">source</a></section></article><h2 id="Layer-Initialization-1"><a class="docs-heading-anchor" href="#Layer-Initialization-1">Layer Initialization</a><a class="docs-heading-anchor-permalink" href="#Layer-Initialization-1" title="Permalink"></a></h2><p>These are primarily useful if you are planning to write your own layers. Flux initializes convolutional layers and recurrent cells with <code>glorot_uniform</code> by default. To change the default on an applicable layer, pass the desired function with the <code>init</code> keyword. For example:</p><pre><code class="language-julia-repl">julia&gt; conv = Conv((3, 3), 1 =&gt; 8, relu; init=Flux.glorot_normal)
+ 3</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L196-L214">source</a></section></article><h2 id="Layer-Initialization-1"><a class="docs-heading-anchor" href="#Layer-Initialization-1">Layer Initialization</a><a class="docs-heading-anchor-permalink" href="#Layer-Initialization-1" title="Permalink"></a></h2><p>These are primarily useful if you are planning to write your own layers. Flux initializes convolutional layers and recurrent cells with <code>glorot_uniform</code> by default. To change the default on an applicable layer, pass the desired function with the <code>init</code> keyword. For example:</p><pre><code class="language-julia-repl">julia&gt; conv = Conv((3, 3), 1 =&gt; 8, relu; init=Flux.glorot_normal)
 Conv((3, 3), 1=&gt;8, relu)</code></pre><article class="docstring"><header><a class="docstring-binding" id="Flux.glorot_uniform" href="#Flux.glorot_uniform"><code>Flux.glorot_uniform</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">glorot_uniform(dims...)</code></pre><p>Return an <code>Array</code> of size <code>dims</code> containing random variables taken from a uniform distribution in the interval <span>$[-x, x]$</span>, where <code>x = sqrt(24 / sum(dims)) / 2</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.glorot_uniform(2, 3)
 2×3 Array{Float32,2}:
  0.601094  -0.57414   -0.814925
- 0.900868   0.805994   0.057514</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L7-L20">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.glorot_normal" href="#Flux.glorot_normal"><code>Flux.glorot_normal</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">glorot_normal(dims...)</code></pre><p>Return an <code>Array</code> of size <code>dims</code> containing random variables taken from a normal distribution with mean 0 and standard deviation <code>sqrt(2 / sum(dims))</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.glorot_normal(3, 2)
+ 0.900868   0.805994   0.057514</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L7-L20">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.glorot_normal" href="#Flux.glorot_normal"><code>Flux.glorot_normal</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">glorot_normal(dims...)</code></pre><p>Return an <code>Array</code> of size <code>dims</code> containing random variables taken from a normal distribution with mean 0 and standard deviation <code>sqrt(2 / sum(dims))</code>.</p><p><strong>Examples</strong></p><pre><code class="language-julia-repl">julia&gt; Flux.glorot_normal(3, 2)
 3×2 Array{Float32,2}:
   0.429505  -0.0852891
   0.523935   0.371009
- -0.223261   0.188052</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L23-L37">source</a></section></article><h2 id="Model-Abstraction-1"><a class="docs-heading-anchor" href="#Model-Abstraction-1">Model Abstraction</a><a class="docs-heading-anchor-permalink" href="#Model-Abstraction-1" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-binding" id="Flux.destructure" href="#Flux.destructure"><code>Flux.destructure</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">destructure(m)</code></pre><p>Flatten a model&#39;s parameters into a single weight vector.</p><pre><code class="language-none">julia&gt; m = Chain(Dense(10, 5, σ), Dense(5, 2), softmax)
+ -0.223261   0.188052</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L23-L37">source</a></section></article><h2 id="Model-Abstraction-1"><a class="docs-heading-anchor" href="#Model-Abstraction-1">Model Abstraction</a><a class="docs-heading-anchor-permalink" href="#Model-Abstraction-1" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-binding" id="Flux.destructure" href="#Flux.destructure"><code>Flux.destructure</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">destructure(m)</code></pre><p>Flatten a model&#39;s parameters into a single weight vector.</p><pre><code class="language-none">julia&gt; m = Chain(Dense(10, 5, σ), Dense(5, 2), softmax)
 Chain(Dense(10, 5, σ), Dense(5, 2), softmax)
 
 julia&gt; θ, re = destructure(m);
@@ -94,6 +94,6 @@ julia&gt; θ
 67-element Array{Float32,1}:
 -0.1407104
 ...</code></pre><p>The second return value <code>re</code> allows you to reconstruct the original network after making modifications to the weight vector (for example, with a hypernetwork).</p><pre><code class="language-none">julia&gt; re(θ .* 2)
-Chain(Dense(10, 5, σ), Dense(5, 2), softmax)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L249-L269">source</a></section></article><h2 id="Callback-Helpers-1"><a class="docs-heading-anchor" href="#Callback-Helpers-1">Callback Helpers</a><a class="docs-heading-anchor-permalink" href="#Callback-Helpers-1" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-binding" id="Flux.throttle" href="#Flux.throttle"><code>Flux.throttle</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">throttle(f, timeout; leading=true, trailing=false)</code></pre><p>Return a function that when invoked, will only be triggered at most once during <code>timeout</code> seconds.</p><p>Normally, the throttled function will run as much as it can, without ever going more than once per <code>wait</code> duration; but if you&#39;d like to disable the execution on the leading edge, pass <code>leading=false</code>. To enable execution on the trailing edge, pass <code>trailing=true</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/utils.jl#L281-L291">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.stop" href="#Flux.Optimise.stop"><code>Flux.Optimise.stop</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">stop()</code></pre><p>Call <code>Flux.stop()</code> in a callback to indicate when a callback condition is met. This will trigger the train loop to stop and exit.</p><p><strong>Examples</strong></p><pre><code class="language-julia">cb = function ()
+Chain(Dense(10, 5, σ), Dense(5, 2), softmax)</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L249-L269">source</a></section></article><h2 id="Callback-Helpers-1"><a class="docs-heading-anchor" href="#Callback-Helpers-1">Callback Helpers</a><a class="docs-heading-anchor-permalink" href="#Callback-Helpers-1" title="Permalink"></a></h2><article class="docstring"><header><a class="docstring-binding" id="Flux.throttle" href="#Flux.throttle"><code>Flux.throttle</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">throttle(f, timeout; leading=true, trailing=false)</code></pre><p>Return a function that when invoked, will only be triggered at most once during <code>timeout</code> seconds.</p><p>Normally, the throttled function will run as much as it can, without ever going more than once per <code>wait</code> duration; but if you&#39;d like to disable the execution on the leading edge, pass <code>leading=false</code>. To enable execution on the trailing edge, pass <code>trailing=true</code>.</p></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/utils.jl#L281-L291">source</a></section></article><article class="docstring"><header><a class="docstring-binding" id="Flux.Optimise.stop" href="#Flux.Optimise.stop"><code>Flux.Optimise.stop</code></a> — <span class="docstring-category">Function</span></header><section><div><pre><code class="language-julia">stop()</code></pre><p>Call <code>Flux.stop()</code> in a callback to indicate when a callback condition is met. This will trigger the train loop to stop and exit.</p><p><strong>Examples</strong></p><pre><code class="language-julia">cb = function ()
   accuracy() &gt; 0.9 &amp;&amp; Flux.stop()
-end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/36d3a9ce998a23a53c0fd93123874b52bd0a3f02/src/optimise/train.jl#L42-L54">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../ecosystem/">« The Julia Ecosystem</a><a class="docs-footer-nextpage" href="../performance/">Performance Tips »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Sunday 10 May 2020 09:08">Sunday 10 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>
+end</code></pre></div><a class="docs-sourcelink" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/a84e08cf287aad2ef8a1977c2eb9239d79d63e4f/src/optimise/train.jl#L42-L54">source</a></section></article></article><nav class="docs-footer"><a class="docs-footer-prevpage" href="../ecosystem/">« The Julia Ecosystem</a><a class="docs-footer-nextpage" href="../performance/">Performance Tips »</a></nav></div><div class="modal" id="documenter-settings"><div class="modal-background"></div><div class="modal-card"><header class="modal-card-head"><p class="modal-card-title">Settings</p><button class="delete"></button></header><section class="modal-card-body"><p><label class="label">Theme</label><div class="select"><select id="documenter-themepicker"><option value="documenter-light">documenter-light</option><option value="documenter-dark">documenter-dark</option></select></div></p><hr/><p>This document was generated with <a href="https://github.com/JuliaDocs/Documenter.jl">Documenter.jl</a> on <span class="colophon-date" title="Tuesday 12 May 2020 15:14">Tuesday 12 May 2020</span>. Using Julia version 1.3.1.</p></section><footer class="modal-card-foot"></footer></div></div></div></body></html>