From c0ea1580e2a676f15f5e4bab549c8fef6d087770 Mon Sep 17 00:00:00 2001
From: autodocs <autodocs>
Date: Wed, 18 Oct 2017 14:33:23 +0000
Subject: [PATCH] build based on fd249b7

---
 latest/community.html           |  2 +-
 latest/data/onehot.html         |  2 +-
 latest/gpu.html                 |  2 +-
 latest/index.html               |  2 +-
 latest/models/basics.html       |  2 +-
 latest/models/layers.html       | 10 ++++++----
 latest/models/recurrence.html   |  4 ++--
 latest/search.html              |  2 +-
 latest/search_index.js          | 34 ++++++++++++++++++++-------------
 latest/training/optimisers.html |  4 ++--
 latest/training/training.html   |  2 +-
 11 files changed, 38 insertions(+), 28 deletions(-)
diff --git a/latest/community.html b/latest/community.html
index 81bee480..554f3857 100644
--- a/latest/community.html
+++ b/latest/community.html
@@ -6,4 +6,4 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="gpu.html">GPU Support</a></li><li class="current"><a class="toctext" href="community.html">Community</a><ul class="internal"></ul></li></ul></nav><article id="docs"><header><nav><ul><li><a href="community.html">Community</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/community.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Community</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Community-1" href="#Community-1">Community</a></h1><p>All Flux users are welcome to join our community on the <a href="https://discourse.julialang.org/">Julia forum</a>, the <a href="https://discourse.julialang.org/t/announcing-a-julia-slack/4866">slack</a> (channel #machine-learning), or Flux&#39;s <a href="https://gitter.im/FluxML/Lobby">Gitter</a>. If you have questions or issues we&#39;ll try to help you out.</p><p>If you&#39;re interested in hacking on Flux, the <a href="https://github.com/FluxML/Flux.jl">source code</a> is open and easy to understand – it&#39;s all just the same Julia code you work with normally. You might be interested in our <a href="https://github.com/FluxML/Flux.jl/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22">intro issues</a> to get started.</p><footer><hr/><a class="previous" href="gpu.html"><span class="direction">Previous</span><span class="title">GPU Support</span></a></footer></article></body></html>
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="gpu.html">GPU Support</a></li><li class="current"><a class="toctext" href="community.html">Community</a><ul class="internal"></ul></li></ul></nav><article id="docs"><header><nav><ul><li><a href="community.html">Community</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/community.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Community</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Community-1" href="#Community-1">Community</a></h1><p>All Flux users are welcome to join our community on the <a href="https://discourse.julialang.org/">Julia forum</a>, the <a href="https://discourse.julialang.org/t/announcing-a-julia-slack/4866">slack</a> (channel #machine-learning), or Flux&#39;s <a href="https://gitter.im/FluxML/Lobby">Gitter</a>. If you have questions or issues we&#39;ll try to help you out.</p><p>If you&#39;re interested in hacking on Flux, the <a href="https://github.com/FluxML/Flux.jl">source code</a> is open and easy to understand – it&#39;s all just the same Julia code you work with normally. You might be interested in our <a href="https://github.com/FluxML/Flux.jl/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22">intro issues</a> to get started.</p><footer><hr/><a class="previous" href="gpu.html"><span class="direction">Previous</span><span class="title">GPU Support</span></a></footer></article></body></html>
diff --git a/latest/data/onehot.html b/latest/data/onehot.html
index 88cf15ca..1c609bf5 100644
--- a/latest/data/onehot.html
+++ b/latest/data/onehot.html
@@ -6,7 +6,7 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li class="current"><a class="toctext" href="onehot.html">One-Hot Encoding</a><ul class="internal"><li><a class="toctext" href="#Batches-1">Batches</a></li></ul></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="onehot.html">One-Hot Encoding</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/data/onehot.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>One-Hot Encoding</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="One-Hot-Encoding-1" href="#One-Hot-Encoding-1">One-Hot Encoding</a></h1><p>It&#39;s common to encode categorical variables (like <code>true</code>, <code>false</code> or <code>cat</code>, <code>dog</code>) in &quot;one-of-k&quot; or <a href="https://en.wikipedia.org/wiki/One-hot">&quot;one-hot&quot;</a> form. Flux provides the <code>onehot</code> function to make this easy.</p><pre><code class="language-none">julia&gt; using Flux: onehot
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li class="current"><a class="toctext" href="onehot.html">One-Hot Encoding</a><ul class="internal"><li><a class="toctext" href="#Batches-1">Batches</a></li></ul></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="onehot.html">One-Hot Encoding</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/data/onehot.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>One-Hot Encoding</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="One-Hot-Encoding-1" href="#One-Hot-Encoding-1">One-Hot Encoding</a></h1><p>It&#39;s common to encode categorical variables (like <code>true</code>, <code>false</code> or <code>cat</code>, <code>dog</code>) in &quot;one-of-k&quot; or <a href="https://en.wikipedia.org/wiki/One-hot">&quot;one-hot&quot;</a> form. Flux provides the <code>onehot</code> function to make this easy.</p><pre><code class="language-none">julia&gt; using Flux: onehot
 
 julia&gt; onehot(:b, [:a, :b, :c])
 3-element Flux.OneHotVector:
diff --git a/latest/gpu.html b/latest/gpu.html
index c66b2b8e..3238787a 100644
--- a/latest/gpu.html
+++ b/latest/gpu.html
@@ -6,7 +6,7 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li><li class="current"><a class="toctext" href="gpu.html">GPU Support</a><ul class="internal"></ul></li><li><a class="toctext" href="community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="gpu.html">GPU Support</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/gpu.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>GPU Support</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="GPU-Support-1" href="#GPU-Support-1">GPU Support</a></h1><p>Support for array operations on other hardware backends, like GPUs, is provided by external packages like <a href="https://github.com/JuliaGPU/CuArrays.jl">CuArrays</a> and <a href="https://github.com/JuliaGPU/CLArrays.jl">CLArrays</a>. Flux doesn&#39;t care what array type you use, so we can just plug these in without any other changes.</p><p>For example, we can use <code>CuArrays</code> (with the <code>cu</code> converter) to run our <a href="models/basics.html">basic example</a> on an NVIDIA GPU.</p><pre><code class="language-julia">using CuArrays
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li><li class="current"><a class="toctext" href="gpu.html">GPU Support</a><ul class="internal"></ul></li><li><a class="toctext" href="community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="gpu.html">GPU Support</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/gpu.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>GPU Support</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="GPU-Support-1" href="#GPU-Support-1">GPU Support</a></h1><p>Support for array operations on other hardware backends, like GPUs, is provided by external packages like <a href="https://github.com/JuliaGPU/CuArrays.jl">CuArrays</a> and <a href="https://github.com/JuliaGPU/CLArrays.jl">CLArrays</a>. Flux doesn&#39;t care what array type you use, so we can just plug these in without any other changes.</p><p>For example, we can use <code>CuArrays</code> (with the <code>cu</code> converter) to run our <a href="models/basics.html">basic example</a> on an NVIDIA GPU.</p><pre><code class="language-julia">using CuArrays
 
 W = cu(rand(2, 5)) # a 2×5 CuArray
 b = cu(rand(2))
diff --git a/latest/index.html b/latest/index.html
index 97873719..b9248ded 100644
--- a/latest/index.html
+++ b/latest/index.html
@@ -6,5 +6,5 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li class="current"><a class="toctext" href="index.html">Home</a><ul class="internal"><li class="toplevel"><a class="toctext" href="#Installation-1">Installation</a></li></ul></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="gpu.html">GPU Support</a></li><li><a class="toctext" href="community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="index.html">Home</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/index.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Home</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Flux:-The-Julia-Machine-Learning-Library-1" href="#Flux:-The-Julia-Machine-Learning-Library-1">Flux: The Julia Machine Learning Library</a></h1><p>Flux is a library for machine learning. It comes &quot;batteries-included&quot; with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the <a href="https://github.com/FluxML/CuArrays.jl">GPU kernels</a>) and any part can be tweaked to your liking.</p><h1><a class="nav-anchor" id="Installation-1" href="#Installation-1">Installation</a></h1><p>Install <a href="https://julialang.org/downloads/">Julia 0.6.0 or later</a>, if you haven&#39;t already.</p><pre><code class="language-julia">Pkg.add(&quot;Flux&quot;)
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li class="current"><a class="toctext" href="index.html">Home</a><ul class="internal"><li class="toplevel"><a class="toctext" href="#Installation-1">Installation</a></li></ul></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="gpu.html">GPU Support</a></li><li><a class="toctext" href="community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="index.html">Home</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/index.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Home</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Flux:-The-Julia-Machine-Learning-Library-1" href="#Flux:-The-Julia-Machine-Learning-Library-1">Flux: The Julia Machine Learning Library</a></h1><p>Flux is a library for machine learning. It comes &quot;batteries-included&quot; with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the <a href="https://github.com/FluxML/CuArrays.jl">GPU kernels</a>) and any part can be tweaked to your liking.</p><h1><a class="nav-anchor" id="Installation-1" href="#Installation-1">Installation</a></h1><p>Install <a href="https://julialang.org/downloads/">Julia 0.6.0 or later</a>, if you haven&#39;t already.</p><pre><code class="language-julia">Pkg.add(&quot;Flux&quot;)
 Pkg.test(&quot;Flux&quot;) # Check things installed correctly</code></pre><p>Start with the <a href="models/basics.html">basics</a>. The <a href="https://github.com/FluxML/model-zoo/">model zoo</a> is also a good starting point for many common kinds of models.</p><footer><hr/><a class="next" href="models/basics.html"><span class="direction">Next</span><span class="title">Basics</span></a></footer></article></body></html>
diff --git a/latest/models/basics.html b/latest/models/basics.html
index ec97d6f6..d70ca15e 100644
--- a/latest/models/basics.html
+++ b/latest/models/basics.html
@@ -6,7 +6,7 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li class="current"><a class="toctext" href="basics.html">Basics</a><ul class="internal"><li><a class="toctext" href="#Taking-Gradients-1">Taking Gradients</a></li><li><a class="toctext" href="#Building-Layers-1">Building Layers</a></li><li><a class="toctext" href="#Stacking-It-Up-1">Stacking It Up</a></li></ul></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li><a class="toctext" href="layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="basics.html">Basics</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/basics.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Basics</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Model-Building-Basics-1" href="#Model-Building-Basics-1">Model-Building Basics</a></h1><h2><a class="nav-anchor" id="Taking-Gradients-1" href="#Taking-Gradients-1">Taking Gradients</a></h2><p>Consider a simple linear regression, which tries to predict an output array <code>y</code> from an input <code>x</code>. (It&#39;s a good idea to follow this example in the Julia repl.)</p><pre><code class="language-julia">W = rand(2, 5)
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li class="current"><a class="toctext" href="basics.html">Basics</a><ul class="internal"><li><a class="toctext" href="#Taking-Gradients-1">Taking Gradients</a></li><li><a class="toctext" href="#Building-Layers-1">Building Layers</a></li><li><a class="toctext" href="#Stacking-It-Up-1">Stacking It Up</a></li></ul></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li><a class="toctext" href="layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="basics.html">Basics</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/basics.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Basics</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Model-Building-Basics-1" href="#Model-Building-Basics-1">Model-Building Basics</a></h1><h2><a class="nav-anchor" id="Taking-Gradients-1" href="#Taking-Gradients-1">Taking Gradients</a></h2><p>Consider a simple linear regression, which tries to predict an output array <code>y</code> from an input <code>x</code>. (It&#39;s a good idea to follow this example in the Julia repl.)</p><pre><code class="language-julia">W = rand(2, 5)
 b = rand(2)
 
 predict(x) = W*x .+ b
diff --git a/latest/models/layers.html b/latest/models/layers.html
index 79da5315..cb85de10 100644
--- a/latest/models/layers.html
+++ b/latest/models/layers.html
@@ -1,20 +1,22 @@
 <!DOCTYPE html>
-<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Layer Reference · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Model Reference · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li class="current"><a class="toctext" href="layers.html">Layer Reference</a><ul class="internal"><li><a class="toctext" href="#Model-Layers-1">Model Layers</a></li><li><a class="toctext" href="#Activation-Functions-1">Activation Functions</a></li></ul></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="layers.html">Layer Reference</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/layers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Layer Reference</span><a class="fa fa-bars" href="#"></a></div></header><h2><a class="nav-anchor" id="Model-Layers-1" href="#Model-Layers-1">Model Layers</a></h2><p>These core layers form the foundation of almost all neural networks.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Chain" href="#Flux.Chain"><code>Flux.Chain</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Chain(layers...)</code></pre><p>Chain multiple layers / functions together, so that they are called in sequence on a given input.</p><pre><code class="language-none">m = Chain(x -&gt; x^2, x -&gt; x+1)
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li class="current"><a class="toctext" href="layers.html">Model Reference</a><ul class="internal"><li><a class="toctext" href="#Layers-1">Layers</a></li><li><a class="toctext" href="#Recurrent-Cells-1">Recurrent Cells</a></li><li><a class="toctext" href="#Activation-Functions-1">Activation Functions</a></li></ul></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="layers.html">Model Reference</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/layers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Model Reference</span><a class="fa fa-bars" href="#"></a></div></header><h2><a class="nav-anchor" id="Layers-1" href="#Layers-1">Layers</a></h2><p>These core layers form the foundation of almost all neural networks.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Chain" href="#Flux.Chain"><code>Flux.Chain</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Chain(layers...)</code></pre><p>Chain multiple layers / functions together, so that they are called in sequence on a given input.</p><pre><code class="language-none">m = Chain(x -&gt; x^2, x -&gt; x+1)
 m(5) == 26
 
 m = Chain(Dense(10, 5), Dense(5, 2))
 x = rand(10)
-m(x) == m[2](m[1](x))</code></pre><p><code>Chain</code> also supports indexing and slicing, e.g. <code>m[2]</code> or <code>m[1:end-1]</code>. <code>m[1:3](x)</code> will calculate the output of the first three layers.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/layers/basic.jl#L1-L16">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Dense" href="#Flux.Dense"><code>Flux.Dense</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Dense(in::Integer, out::Integer, σ = identity)</code></pre><p>Creates a traditional <code>Dense</code> layer with parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-none">y = σ.(W * x .+ b)</code></pre><p>The input <code>x</code> must be a vector of length <code>in</code>, or a batch of vectors represented as an <code>in × N</code> matrix. The out <code>y</code> will be a vector or batch of length <code>out</code>.</p><pre><code class="language-none">julia&gt; d = Dense(5, 2)
+m(x) == m[2](m[1](x))</code></pre><p><code>Chain</code> also supports indexing and slicing, e.g. <code>m[2]</code> or <code>m[1:end-1]</code>. <code>m[1:3](x)</code> will calculate the output of the first three layers.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/layers/basic.jl#L1-L16">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Dense" href="#Flux.Dense"><code>Flux.Dense</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Dense(in::Integer, out::Integer, σ = identity)</code></pre><p>Creates a traditional <code>Dense</code> layer with parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-none">y = σ.(W * x .+ b)</code></pre><p>The input <code>x</code> must be a vector of length <code>in</code>, or a batch of vectors represented as an <code>in × N</code> matrix. The out <code>y</code> will be a vector or batch of length <code>out</code>.</p><pre><code class="language-none">julia&gt; d = Dense(5, 2)
 Dense(5, 2)
 
 julia&gt; d(rand(5))
 Tracked 2-element Array{Float64,1}:
   0.00257447
-  -0.00449443</code></pre></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/layers/basic.jl#L38-L55">source</a></section><h2><a class="nav-anchor" id="Activation-Functions-1" href="#Activation-Functions-1">Activation Functions</a></h2><p>Non-linearities that go between layers of your model. Most of these functions are defined in <a href="https://github.com/FluxML/NNlib.jl">NNlib</a> but are available by default in Flux.</p><p>Note that, unless otherwise stated, activation functions operate on scalars. To apply them to an array you can call <code>σ.(xs)</code>, <code>relu.(xs)</code> and so on.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.σ" href="#NNlib.σ"><code>NNlib.σ</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">σ(x) = 1 / (1 + exp(-x))</code></pre><p>Classic <a href="https://en.wikipedia.org/wiki/Sigmoid_function">sigmoid</a> activation function.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L1-L6">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.relu" href="#NNlib.relu"><code>NNlib.relu</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">relu(x) = max(0, x)</code></pre><p><a href="https://en.wikipedia.org/wiki/Rectifier_(neural_networks)">Rectified Linear Unit</a> activation function.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L12-L17">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.leakyrelu" href="#NNlib.leakyrelu"><code>NNlib.leakyrelu</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">leakyrelu(x) = max(0.01x, x)</code></pre><p>Leaky <a href="https://en.wikipedia.org/wiki/Rectifier_(neural_networks)">Rectified Linear Unit</a> activation function.</p><p>You can also specify the coefficient explicitly, e.g. <code>leakyrelu(x, 0.01)</code>.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L20-L27">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.elu" href="#NNlib.elu"><code>NNlib.elu</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">elu(x; α = 1) = x &gt; 0 ? x : α * (exp(x) - one(x)</code></pre><p>Exponential Linear Unit activation function. See <a href="https://arxiv.org/abs/1511.07289">Fast and Accurate Deep Network Learning by Exponential Linear Units</a></p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L30-L35">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.swish" href="#NNlib.swish"><code>NNlib.swish</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">swish(x) = x * σ(x)</code></pre><p>Self-gated actvation function.</p><p>See <a href="https://arxiv.org/pdf/1710.05941.pdf">Swish: a Self-Gated Activation Function</a>.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L38-L44">source</a></section><footer><hr/><a class="previous" href="recurrence.html"><span class="direction">Previous</span><span class="title">Recurrence</span></a><a class="next" href="../training/optimisers.html"><span class="direction">Next</span><span class="title">Optimisers</span></a></footer></article></body></html>
+  -0.00449443</code></pre></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/layers/basic.jl#L38-L55">source</a></section><h2><a class="nav-anchor" id="Recurrent-Cells-1" href="#Recurrent-Cells-1">Recurrent Cells</a></h2><p>Much like the core layers above, but can be used to process sequence data (as well as other kinds of structured data).</p><pre><code class="language-none">RNN
+LSTM
+Recur</code></pre><h2><a class="nav-anchor" id="Activation-Functions-1" href="#Activation-Functions-1">Activation Functions</a></h2><p>Non-linearities that go between layers of your model. Most of these functions are defined in <a href="https://github.com/FluxML/NNlib.jl">NNlib</a> but are available by default in Flux.</p><p>Note that, unless otherwise stated, activation functions operate on scalars. To apply them to an array you can call <code>σ.(xs)</code>, <code>relu.(xs)</code> and so on.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.σ" href="#NNlib.σ"><code>NNlib.σ</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">σ(x) = 1 / (1 + exp(-x))</code></pre><p>Classic <a href="https://en.wikipedia.org/wiki/Sigmoid_function">sigmoid</a> activation function.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L1-L6">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.relu" href="#NNlib.relu"><code>NNlib.relu</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">relu(x) = max(0, x)</code></pre><p><a href="https://en.wikipedia.org/wiki/Rectifier_(neural_networks)">Rectified Linear Unit</a> activation function.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L12-L17">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.leakyrelu" href="#NNlib.leakyrelu"><code>NNlib.leakyrelu</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">leakyrelu(x) = max(0.01x, x)</code></pre><p>Leaky <a href="https://en.wikipedia.org/wiki/Rectifier_(neural_networks)">Rectified Linear Unit</a> activation function.</p><p>You can also specify the coefficient explicitly, e.g. <code>leakyrelu(x, 0.01)</code>.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L20-L27">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.elu" href="#NNlib.elu"><code>NNlib.elu</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">elu(x; α = 1) = x &gt; 0 ? x : α * (exp(x) - one(x)</code></pre><p>Exponential Linear Unit activation function. See <a href="https://arxiv.org/abs/1511.07289">Fast and Accurate Deep Network Learning by Exponential Linear Units</a></p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L30-L35">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.swish" href="#NNlib.swish"><code>NNlib.swish</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">swish(x) = x * σ(x)</code></pre><p>Self-gated actvation function.</p><p>See <a href="https://arxiv.org/pdf/1710.05941.pdf">Swish: a Self-Gated Activation Function</a>.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/NNlib.jl/blob/e4b48c1f41b2786ae5d1efef1ba54ff82eeeb49c/src/activation.jl#L38-L44">source</a></section><footer><hr/><a class="previous" href="recurrence.html"><span class="direction">Previous</span><span class="title">Recurrence</span></a><a class="next" href="../training/optimisers.html"><span class="direction">Next</span><span class="title">Optimisers</span></a></footer></article></body></html>
diff --git a/latest/models/recurrence.html b/latest/models/recurrence.html
index 871bae8c..9b2eb2d9 100644
--- a/latest/models/recurrence.html
+++ b/latest/models/recurrence.html
@@ -6,7 +6,7 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li class="current"><a class="toctext" href="recurrence.html">Recurrence</a><ul class="internal"><li><a class="toctext" href="#Recurrent-Cells-1">Recurrent Cells</a></li><li><a class="toctext" href="#Stateful-Models-1">Stateful Models</a></li><li><a class="toctext" href="#Sequences-1">Sequences</a></li><li><a class="toctext" href="#Truncating-Gradients-1">Truncating Gradients</a></li></ul></li><li><a class="toctext" href="layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="recurrence.html">Recurrence</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/recurrence.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Recurrence</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Recurrent-Models-1" href="#Recurrent-Models-1">Recurrent Models</a></h1><h2><a class="nav-anchor" id="Recurrent-Cells-1" href="#Recurrent-Cells-1">Recurrent Cells</a></h2><p>In the simple feedforward case, our model <code>m</code> is a simple function from various inputs <code>xᵢ</code> to predictions <code>yᵢ</code>. (For example, each <code>x</code> might be an MNIST digit and each <code>y</code> a digit label.) Each prediction is completely independent of any others, and using the same <code>x</code> will always produce the same <code>y</code>.</p><pre><code class="language-julia">y₁ = f(x₁)
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li class="current"><a class="toctext" href="recurrence.html">Recurrence</a><ul class="internal"><li><a class="toctext" href="#Recurrent-Cells-1">Recurrent Cells</a></li><li><a class="toctext" href="#Stateful-Models-1">Stateful Models</a></li><li><a class="toctext" href="#Sequences-1">Sequences</a></li><li><a class="toctext" href="#Truncating-Gradients-1">Truncating Gradients</a></li></ul></li><li><a class="toctext" href="layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="recurrence.html">Recurrence</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/recurrence.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Recurrence</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Recurrent-Models-1" href="#Recurrent-Models-1">Recurrent Models</a></h1><h2><a class="nav-anchor" id="Recurrent-Cells-1" href="#Recurrent-Cells-1">Recurrent Cells</a></h2><p>In the simple feedforward case, our model <code>m</code> is a simple function from various inputs <code>xᵢ</code> to predictions <code>yᵢ</code>. (For example, each <code>x</code> might be an MNIST digit and each <code>y</code> a digit label.) Each prediction is completely independent of any others, and using the same <code>x</code> will always produce the same <code>y</code>.</p><pre><code class="language-julia">y₁ = f(x₁)
 y₂ = f(x₂)
 y₃ = f(x₃)
 # ...</code></pre><p>Recurrent networks introduce a <em>hidden state</em> that gets carried over each time we run the model. The model now takes the old <code>h</code> as an input, and produces a new <code>h</code> as output, each time we run it.</p><pre><code class="language-julia">h = # ... initial state ...
@@ -39,4 +39,4 @@ m = Flux.Recur(rnn, h)
 
 y = m(x)</code></pre><p>The <code>Recur</code> wrapper stores the state between runs in the <code>m.state</code> field.</p><p>If you use the <code>RNN(10, 5)</code> constructor – as opposed to <code>RNNCell</code> – you&#39;ll see that it&#39;s simply a wrapped cell.</p><pre><code class="language-julia">julia&gt; RNN(10, 5)
 Recur(RNNCell(Dense(15, 5)))</code></pre><h2><a class="nav-anchor" id="Sequences-1" href="#Sequences-1">Sequences</a></h2><p>Often we want to work with sequences of inputs, rather than individual <code>x</code>s.</p><pre><code class="language-julia">seq = [rand(10) for i = 1:10]</code></pre><p>With <code>Recur</code>, applying our model to each element of a sequence is trivial:</p><pre><code class="language-julia">m.(seq) # returns a list of 5-element vectors</code></pre><p>This works even when we&#39;ve chain recurrent layers into a larger model.</p><pre><code class="language-julia">m = Chain(LSTM(10, 15), Dense(15, 5))
-m.(seq)</code></pre><h2><a class="nav-anchor" id="Truncating-Gradients-1" href="#Truncating-Gradients-1">Truncating Gradients</a></h2><p>By default, calculating the gradients in a recurrent layer involves the entire history. For example, if we call the model on 100 inputs, calling <code>back!</code> will calculate the gradient for those 100 calls. If we then calculate another 10 inputs we have to calculate 110 gradients – this accumulates and quickly becomes expensive.</p><p>To avoid this we can <em>truncate</em> the gradient calculation, forgetting the history.</p><pre><code class="language-julia">truncate!(m)</code></pre><p>Calling <code>truncate!</code> wipes the slate clean, so we can call the model with more inputs without building up an expensive gradient computation.</p><footer><hr/><a class="previous" href="basics.html"><span class="direction">Previous</span><span class="title">Basics</span></a><a class="next" href="layers.html"><span class="direction">Next</span><span class="title">Layer Reference</span></a></footer></article></body></html>
+m.(seq)</code></pre><h2><a class="nav-anchor" id="Truncating-Gradients-1" href="#Truncating-Gradients-1">Truncating Gradients</a></h2><p>By default, calculating the gradients in a recurrent layer involves the entire history. For example, if we call the model on 100 inputs, calling <code>back!</code> will calculate the gradient for those 100 calls. If we then calculate another 10 inputs we have to calculate 110 gradients – this accumulates and quickly becomes expensive.</p><p>To avoid this we can <em>truncate</em> the gradient calculation, forgetting the history.</p><pre><code class="language-julia">truncate!(m)</code></pre><p>Calling <code>truncate!</code> wipes the slate clean, so we can call the model with more inputs without building up an expensive gradient computation.</p><footer><hr/><a class="previous" href="basics.html"><span class="direction">Previous</span><span class="title">Basics</span></a><a class="next" href="layers.html"><span class="direction">Next</span><span class="title">Model Reference</span></a></footer></article></body></html>
diff --git a/latest/search.html b/latest/search.html
index af1c20e5..704545f3 100644
--- a/latest/search.html
+++ b/latest/search.html
@@ -6,4 +6,4 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="gpu.html">GPU Support</a></li><li><a class="toctext" href="community.html">Community</a></li></ul></nav><article><header><nav><ul><li>Search</li></ul></nav><hr/><div id="topbar"><span>Search</span><a class="fa fa-bars" href="#"></a></div></header><h1>Search</h1><p id="search-info">Number of results: <span id="search-results-number">loading...</span></p><ul id="search-results"></ul></article></body><script src="search_index.js"></script><script src="assets/search.js"></script></html>
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="gpu.html">GPU Support</a></li><li><a class="toctext" href="community.html">Community</a></li></ul></nav><article><header><nav><ul><li>Search</li></ul></nav><hr/><div id="topbar"><span>Search</span><a class="fa fa-bars" href="#"></a></div></header><h1>Search</h1><p id="search-info">Number of results: <span id="search-results-number">loading...</span></p><ul id="search-results"></ul></article></body><script src="search_index.js"></script><script src="assets/search.js"></script></html>
diff --git a/latest/search_index.js b/latest/search_index.js
index 1c9b6a26..58cb914c 100644
--- a/latest/search_index.js
+++ b/latest/search_index.js
@@ -114,15 +114,15 @@ var documenterSearchIndex = {"docs": [
 
 {
     "location": "models/layers.html#",
-    "page": "Layer Reference",
-    "title": "Layer Reference",
+    "page": "Model Reference",
+    "title": "Model Reference",
     "category": "page",
     "text": ""
 },
 
 {
     "location": "models/layers.html#Flux.Chain",
-    "page": "Layer Reference",
+    "page": "Model Reference",
     "title": "Flux.Chain",
     "category": "Type",
     "text": "Chain(layers...)\n\nChain multiple layers / functions together, so that they are called in sequence on a given input.\n\nm = Chain(x -> x^2, x -> x+1)\nm(5) == 26\n\nm = Chain(Dense(10, 5), Dense(5, 2))\nx = rand(10)\nm(x) == m[2](m[1](x))\n\nChain also supports indexing and slicing, e.g. m[2] or m[1:end-1]. m[1:3](x) will calculate the output of the first three layers.\n\n\n\n"
@@ -130,23 +130,31 @@ var documenterSearchIndex = {"docs": [
 
 {
     "location": "models/layers.html#Flux.Dense",
-    "page": "Layer Reference",
+    "page": "Model Reference",
     "title": "Flux.Dense",
     "category": "Type",
     "text": "Dense(in::Integer, out::Integer, σ = identity)\n\nCreates a traditional Dense layer with parameters W and b.\n\ny = σ.(W * x .+ b)\n\nThe input x must be a vector of length in, or a batch of vectors represented as an in × N matrix. The out y will be a vector or batch of length out.\n\njulia> d = Dense(5, 2)\nDense(5, 2)\n\njulia> d(rand(5))\nTracked 2-element Array{Float64,1}:\n  0.00257447\n  -0.00449443\n\n\n\n"
 },
 
 {
-    "location": "models/layers.html#Model-Layers-1",
-    "page": "Layer Reference",
-    "title": "Model Layers",
+    "location": "models/layers.html#Layers-1",
+    "page": "Model Reference",
+    "title": "Layers",
     "category": "section",
     "text": "These core layers form the foundation of almost all neural networks.Chain\nDense"
 },
 
+{
+    "location": "models/layers.html#Recurrent-Cells-1",
+    "page": "Model Reference",
+    "title": "Recurrent Cells",
+    "category": "section",
+    "text": "Much like the core layers above, but can be used to process sequence data (as well as other kinds of structured data).RNN\nLSTM\nRecur"
+},
+
 {
     "location": "models/layers.html#NNlib.σ",
-    "page": "Layer Reference",
+    "page": "Model Reference",
     "title": "NNlib.σ",
     "category": "Function",
     "text": "σ(x) = 1 / (1 + exp(-x))\n\nClassic sigmoid activation function.\n\n\n\n"
@@ -154,7 +162,7 @@ var documenterSearchIndex = {"docs": [
 
 {
     "location": "models/layers.html#NNlib.relu",
-    "page": "Layer Reference",
+    "page": "Model Reference",
     "title": "NNlib.relu",
     "category": "Function",
     "text": "relu(x) = max(0, x)\n\nRectified Linear Unit activation function.\n\n\n\n"
@@ -162,7 +170,7 @@ var documenterSearchIndex = {"docs": [
 
 {
     "location": "models/layers.html#NNlib.leakyrelu",
-    "page": "Layer Reference",
+    "page": "Model Reference",
     "title": "NNlib.leakyrelu",
     "category": "Function",
     "text": "leakyrelu(x) = max(0.01x, x)\n\nLeaky Rectified Linear Unit activation function.\n\nYou can also specify the coefficient explicitly, e.g. leakyrelu(x, 0.01).\n\n\n\n"
@@ -170,7 +178,7 @@ var documenterSearchIndex = {"docs": [
 
 {
     "location": "models/layers.html#NNlib.elu",
-    "page": "Layer Reference",
+    "page": "Model Reference",
     "title": "NNlib.elu",
     "category": "Function",
     "text": "elu(x; α = 1) = x > 0 ? x : α * (exp(x) - one(x)\n\nExponential Linear Unit activation function. See Fast and Accurate Deep Network Learning by Exponential Linear Units\n\n\n\n"
@@ -178,7 +186,7 @@ var documenterSearchIndex = {"docs": [
 
 {
     "location": "models/layers.html#NNlib.swish",
-    "page": "Layer Reference",
+    "page": "Model Reference",
     "title": "NNlib.swish",
     "category": "Function",
     "text": "swish(x) = x * σ(x)\n\nSelf-gated actvation function.\n\nSee Swish: a Self-Gated Activation Function.\n\n\n\n"
@@ -186,7 +194,7 @@ var documenterSearchIndex = {"docs": [
 
 {
     "location": "models/layers.html#Activation-Functions-1",
-    "page": "Layer Reference",
+    "page": "Model Reference",
     "title": "Activation Functions",
     "category": "section",
     "text": "Non-linearities that go between layers of your model. Most of these functions are defined in NNlib but are available by default in Flux.Note that, unless otherwise stated, activation functions operate on scalars. To apply them to an array you can call σ.(xs), relu.(xs) and so on.σ\nrelu\nleakyrelu\nelu\nswish"
diff --git a/latest/training/optimisers.html b/latest/training/optimisers.html
index 239b3f62..51268454 100644
--- a/latest/training/optimisers.html
+++ b/latest/training/optimisers.html
@@ -6,7 +6,7 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li class="current"><a class="toctext" href="optimisers.html">Optimisers</a><ul class="internal"><li><a class="toctext" href="#Optimiser-Reference-1">Optimiser Reference</a></li></ul></li><li><a class="toctext" href="training.html">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="optimisers.html">Optimisers</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/optimisers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Optimisers</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Optimisers-1" href="#Optimisers-1">Optimisers</a></h1><p>Consider a <a href="../models/basics.html">simple linear regression</a>. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-julia">W = param(rand(2, 5))
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li class="current"><a class="toctext" href="optimisers.html">Optimisers</a><ul class="internal"><li><a class="toctext" href="#Optimiser-Reference-1">Optimiser Reference</a></li></ul></li><li><a class="toctext" href="training.html">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="optimisers.html">Optimisers</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/optimisers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Optimisers</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Optimisers-1" href="#Optimisers-1">Optimisers</a></h1><p>Consider a <a href="../models/basics.html">simple linear regression</a>. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-julia">W = param(rand(2, 5))
 b = param(rand(2))
 
 predict(x) = W*x .+ b
@@ -27,4 +27,4 @@ end</code></pre><p>If we call <code>update</code>, the parameters <code>W</code>
   Dense(10, 5, σ),
   Dense(5, 2), softmax)</code></pre><p>Instead of having to write <code>[m[1].W, m[1].b, ...]</code>, Flux provides a params function <code>params(m)</code> that returns a list of all parameters in the model for you.</p><p>For the update step, there&#39;s nothing whatsoever wrong with writing the loop above – it&#39;ll work just fine – but Flux provides various <em>optimisers</em> that make it more convenient.</p><pre><code class="language-julia">opt = SGD([W, b], 0.1) # Gradient descent with learning rate 0.1
 
-opt() # Carry out the update, modifying `W` and `b`.</code></pre><p>An optimiser takes a parameter list and returns a function that does the same thing as <code>update</code> above. We can pass either <code>opt</code> or <code>update</code> to our <a href="training.html">training loop</a>, which will then run the optimiser after every mini-batch of data.</p><h2><a class="nav-anchor" id="Optimiser-Reference-1" href="#Optimiser-Reference-1">Optimiser Reference</a></h2><p>All optimisers return a function that, when called, will update the parameters passed to it.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.SGD" href="#Flux.Optimise.SGD"><code>Flux.Optimise.SGD</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">SGD(params, η = 1; decay = 0)</code></pre><p>Classic gradient descent optimiser. For each parameter <code>p</code> and its gradient <code>δp</code>, this runs <code>p -= η*δp</code>.</p><p>Supports decayed learning rate decay if the <code>decay</code> argument is provided.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/optimise/interface.jl#L12-L19">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.Momentum" href="#Flux.Optimise.Momentum"><code>Flux.Optimise.Momentum</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">Momentum(params, ρ, decay = 0)</code></pre><p>SGD with momentum <code>ρ</code> and optional learning rate decay.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/optimise/interface.jl#L23-L27">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.Nesterov" href="#Flux.Optimise.Nesterov"><code>Flux.Optimise.Nesterov</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">Nesterov(params, ρ, decay = 0)</code></pre><p>SGD with Nesterov momentum <code>ρ</code> and optional learning rate decay.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/optimise/interface.jl#L31-L35">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.RMSProp" href="#Flux.Optimise.RMSProp"><code>Flux.Optimise.RMSProp</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">RMSProp(params; η = 0.001, ρ = 0.9, ϵ = 1e-8, decay = 0)</code></pre><p><a href="http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf">RMSProp</a> optimiser. Parameters other than learning rate don&#39;t need tuning. Often a good choice for recurrent networks.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/optimise/interface.jl#L39-L45">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADAM" href="#Flux.Optimise.ADAM"><code>Flux.Optimise.ADAM</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">ADAM(params; η = 0.001, β1 = 0.9, β2 = 0.999, ϵ = 1e-08, decay = 0)</code></pre><p><a href="https://arxiv.org/abs/1412.6980v8">ADAM</a> optimiser.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/optimise/interface.jl#L49-L53">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADAGrad" href="#Flux.Optimise.ADAGrad"><code>Flux.Optimise.ADAGrad</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">ADAGrad(params; η = 0.01, ϵ = 1e-8, decay = 0)</code></pre><p><a href="http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf">ADAGrad</a> optimiser. Parameters don&#39;t need tuning.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/optimise/interface.jl#L57-L62">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADADelta" href="#Flux.Optimise.ADADelta"><code>Flux.Optimise.ADADelta</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">ADADelta(params; η = 0.01, ρ = 0.95, ϵ = 1e-8, decay = 0)</code></pre><p><a href="http://arxiv.org/abs/1212.5701">ADADelta</a> optimiser. Parameters don&#39;t need tuning.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/897f812055ac5b4e863219276b3f1325d6909904/src/optimise/interface.jl#L66-L71">source</a></section><footer><hr/><a class="previous" href="../models/layers.html"><span class="direction">Previous</span><span class="title">Layer Reference</span></a><a class="next" href="training.html"><span class="direction">Next</span><span class="title">Training</span></a></footer></article></body></html>
+opt() # Carry out the update, modifying `W` and `b`.</code></pre><p>An optimiser takes a parameter list and returns a function that does the same thing as <code>update</code> above. We can pass either <code>opt</code> or <code>update</code> to our <a href="training.html">training loop</a>, which will then run the optimiser after every mini-batch of data.</p><h2><a class="nav-anchor" id="Optimiser-Reference-1" href="#Optimiser-Reference-1">Optimiser Reference</a></h2><p>All optimisers return a function that, when called, will update the parameters passed to it.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.SGD" href="#Flux.Optimise.SGD"><code>Flux.Optimise.SGD</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">SGD(params, η = 1; decay = 0)</code></pre><p>Classic gradient descent optimiser. For each parameter <code>p</code> and its gradient <code>δp</code>, this runs <code>p -= η*δp</code>.</p><p>Supports decayed learning rate decay if the <code>decay</code> argument is provided.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/optimise/interface.jl#L12-L19">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.Momentum" href="#Flux.Optimise.Momentum"><code>Flux.Optimise.Momentum</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">Momentum(params, ρ, decay = 0)</code></pre><p>SGD with momentum <code>ρ</code> and optional learning rate decay.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/optimise/interface.jl#L23-L27">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.Nesterov" href="#Flux.Optimise.Nesterov"><code>Flux.Optimise.Nesterov</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">Nesterov(params, ρ, decay = 0)</code></pre><p>SGD with Nesterov momentum <code>ρ</code> and optional learning rate decay.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/optimise/interface.jl#L31-L35">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.RMSProp" href="#Flux.Optimise.RMSProp"><code>Flux.Optimise.RMSProp</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">RMSProp(params; η = 0.001, ρ = 0.9, ϵ = 1e-8, decay = 0)</code></pre><p><a href="http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf">RMSProp</a> optimiser. Parameters other than learning rate don&#39;t need tuning. Often a good choice for recurrent networks.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/optimise/interface.jl#L39-L45">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADAM" href="#Flux.Optimise.ADAM"><code>Flux.Optimise.ADAM</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">ADAM(params; η = 0.001, β1 = 0.9, β2 = 0.999, ϵ = 1e-08, decay = 0)</code></pre><p><a href="https://arxiv.org/abs/1412.6980v8">ADAM</a> optimiser.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/optimise/interface.jl#L49-L53">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADAGrad" href="#Flux.Optimise.ADAGrad"><code>Flux.Optimise.ADAGrad</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">ADAGrad(params; η = 0.01, ϵ = 1e-8, decay = 0)</code></pre><p><a href="http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf">ADAGrad</a> optimiser. Parameters don&#39;t need tuning.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/optimise/interface.jl#L57-L62">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADADelta" href="#Flux.Optimise.ADADelta"><code>Flux.Optimise.ADADelta</code></a> — <span class="docstring-category">Function</span>.</div><div><pre><code class="language-none">ADADelta(params; η = 0.01, ρ = 0.95, ϵ = 1e-8, decay = 0)</code></pre><p><a href="http://arxiv.org/abs/1212.5701">ADADelta</a> optimiser. Parameters don&#39;t need tuning.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/fd249b773e6ba1b6f0846a6c2400475f348df87b/src/optimise/interface.jl#L66-L71">source</a></section><footer><hr/><a class="previous" href="../models/layers.html"><span class="direction">Previous</span><span class="title">Model Reference</span></a><a class="next" href="training.html"><span class="direction">Next</span><span class="title">Training</span></a></footer></article></body></html>
diff --git a/latest/training/training.html b/latest/training/training.html
index 28215d8b..9093eec8 100644
--- a/latest/training/training.html
+++ b/latest/training/training.html
@@ -6,7 +6,7 @@ m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 
 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="optimisers.html">Optimisers</a></li><li class="current"><a class="toctext" href="training.html">Training</a><ul class="internal"><li><a class="toctext" href="#Loss-Functions-1">Loss Functions</a></li><li><a class="toctext" href="#Datasets-1">Datasets</a></li><li><a class="toctext" href="#Callbacks-1">Callbacks</a></li></ul></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="training.html">Training</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/training.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Training</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Training-1" href="#Training-1">Training</a></h1><p>To actually train a model we need three things:</p><ul><li><p>A <em>model loss function</em>, that evaluates how well a model is doing given some input data.</p></li><li><p>A collection of data points that will be provided to the loss function.</p></li><li><p>An <a href="optimisers.html">optimiser</a> that will update the model parameters appropriately.</p></li></ul><p>With these we can call <code>Flux.train!</code>:</p><pre><code class="language-julia">Flux.train!(modelLoss, data, opt)</code></pre><p>There are plenty of examples in the <a href="https://github.com/FluxML/model-zoo">model zoo</a>.</p><h2><a class="nav-anchor" id="Loss-Functions-1" href="#Loss-Functions-1">Loss Functions</a></h2><p>The <code>loss</code> that we defined in <a href="../models/basics.html">basics</a> is completely valid for training. We can also define a loss in terms of some model:</p><pre><code class="language-julia">m = Chain(
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="optimisers.html">Optimisers</a></li><li class="current"><a class="toctext" href="training.html">Training</a><ul class="internal"><li><a class="toctext" href="#Loss-Functions-1">Loss Functions</a></li><li><a class="toctext" href="#Datasets-1">Datasets</a></li><li><a class="toctext" href="#Callbacks-1">Callbacks</a></li></ul></li></ul></li><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu.html">GPU Support</a></li><li><a class="toctext" href="../community.html">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="training.html">Training</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/training.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Training</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Training-1" href="#Training-1">Training</a></h1><p>To actually train a model we need three things:</p><ul><li><p>A <em>model loss function</em>, that evaluates how well a model is doing given some input data.</p></li><li><p>A collection of data points that will be provided to the loss function.</p></li><li><p>An <a href="optimisers.html">optimiser</a> that will update the model parameters appropriately.</p></li></ul><p>With these we can call <code>Flux.train!</code>:</p><pre><code class="language-julia">Flux.train!(modelLoss, data, opt)</code></pre><p>There are plenty of examples in the <a href="https://github.com/FluxML/model-zoo">model zoo</a>.</p><h2><a class="nav-anchor" id="Loss-Functions-1" href="#Loss-Functions-1">Loss Functions</a></h2><p>The <code>loss</code> that we defined in <a href="../models/basics.html">basics</a> is completely valid for training. We can also define a loss in terms of some model:</p><pre><code class="language-julia">m = Chain(
   Dense(784, 32, σ),
   Dense(32, 10), softmax)