build based on 5f24d61

2017-09-11 13:28:47 +00:00 · 2017-09-11 13:28:47 +00:00 · ce348d0845
commit ce348d0845
parent 574a0249aa
54 changed files with 3243 additions and 3672 deletions
--- a/release-0.3/assets/arrow.svg
+++ b/release-0.3/assets/arrow.svg
@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="16.5mm"
+   height="8.6603003mm"
+   viewBox="0 0 58.464567 30.686103"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="arrow.svg">
+  <defs
+     id="defs4" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="11.2"
+     inkscape:cx="14.209234"
+     inkscape:cy="29.780479"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:window-width="1920"
+     inkscape:window-height="1053"
+     inkscape:window-x="0"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1" />
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-1021.6761)">
+    <path
+       style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 0,1021.6761 35.433071,0 -17.716536,30.6861 z"
+       id="path4140"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccc" />
+  </g>
+</svg>
--- a/release-0.3/assets/documenter.css
+++ b/release-0.3/assets/documenter.css
@ -0,0 +1,541 @@
+/*
+ * The default CSS style for Documenter.jl generated sites
+ *
+ * Heavily inspired by the Julia Sphinx theme
+ *     https://github.com/JuliaLang/JuliaDoc
+ * which extends the sphinx_rtd_theme
+ *     https://github.com/snide/sphinx_rtd_theme
+ *
+ * Part of Documenter.jl
+ *     https://github.com/JuliaDocs/Documenter.jl
+ *
+ * License: MIT
+ */
+
+/* fonts */
+body, input {
+  font-family: 'Lato', 'Helvetica Neue', Arial, sans-serif;
+  font-size: 16px;
+  color: #222;
+  text-rendering: optimizeLegibility;
+}
+
+pre, code {
+  font-family: 'Roboto Mono', Monaco, courier, monospace;
+  font-size: 0.90em;
+}
+
+pre code {
+  font-size: 1em;
+}
+
+a {
+    color: #2980b9;
+    text-decoration: none;
+}
+
+a:hover {
+    color: #3091d1;
+}
+
+a:visited {
+    color: #9b59b6;
+}
+
+body {
+    line-height: 1.5;
+}
+
+h1 { font-size: 1.75em; }
+h2 { font-size: 1.50em; }
+h3 { font-size: 1.25em; }
+h4 { font-size: 1.15em; }
+h5 { font-size: 1.10em; }
+h6 { font-size: 1em; }
+
+h4, h5, h6 {
+    margin: 1em 0;
+}
+
+img {
+    max-width: 100%;
+}
+
+table {
+    border-collapse: collapse;
+    margin: 1em 0;
+}
+
+th, td {
+    border: 1px solid #e1e4e5;
+    padding: 0.5em 1em;
+}
+
+th {
+    border-bottom-width: 2px;
+}
+
+tr:nth-child(even) {
+    background-color: #f3f6f6;
+}
+
+hr {
+    border: 0;
+    border-top: 1px solid #e5e5e5;
+}
+
+/* Inline code and code blocks */
+
+code {
+    padding: 0.1em;
+    background-color: rgba(0,0,0,.04);
+    border-radius: 3px;
+}
+
+pre {
+    background-color: #f5f5f5;
+    border: 1px solid #dddddd;
+    border-radius: 3px;
+    padding: 0.5em;
+    overflow: auto;
+}
+
+pre code {
+    padding: 0;
+    background-color: initial;
+}
+
+/* Headers in admonitions and docstrings */
+.admonition h1,
+article section.docstring h1 {
+    font-size: 1.25em;
+}
+
+.admonition h2,
+article section.docstring h2 {
+    font-size: 1.10em;
+}
+
+.admonition h3,
+.admonition h4,
+.admonition h5,
+.admonition h6,
+article section.docstring h3,
+article section.docstring h4,
+article section.docstring h5,
+article section.docstring h6 {
+    font-size: 1em;
+}
+
+/* Navigation */
+nav.toc {
+    position: fixed;
+    top: 0;
+    left: 0;
+    bottom: 0;
+    width: 20em;
+    overflow-y: auto;
+    padding: 1em 0;
+    background-color: #fcfcfc;
+    box-shadow: inset -14px 0px 5px -12px rgb(210,210,210);
+}
+
+nav.toc .logo {
+    margin: 0 auto;
+    display: block;
+    max-height: 6em;
+    max-width: 18em;
+}
+
+nav.toc h1 {
+    text-align: center;
+    margin-top: .57em;
+    margin-bottom: 0;
+}
+
+nav.toc select {
+    display: block;
+    height: 2em;
+    padding: 0 1.6em 0 1em;
+    min-width: 7em;
+    max-width: 90%;
+    max-width: calc(100% - 5em);
+    margin: 0 auto;
+    font-size: .83em;
+    border: 1px solid #c9c9c9;
+    border-radius: 1em;
+
+    /* TODO: doesn't seem to be centered on Safari */
+    text-align: center;
+    text-align-last: center;
+
+    appearance: none;
+    -moz-appearance: none;
+    -webkit-appearance: none;
+
+    background: white url("arrow.svg");
+    background-size: 1.155em;
+    background-repeat: no-repeat;
+    background-position: right;
+}
+
+nav.toc select:hover {
+    border: 1px solid #a0a0a0;
+}
+
+nav.toc select option {
+    text-align: center;
+}
+
+nav.toc input {
+    display: block;
+    height: 2em;
+    width: 90%;
+    width: calc(100% - 5em);
+    margin: 1.2em auto;
+    padding: 0 1em;
+    border: 1px solid #c9c9c9;
+    border-radius: 1em;
+    font-size: .83em;
+}
+
+nav.toc > ul * {
+    margin: 0;
+}
+
+nav.toc ul {
+    color: #404040;
+    padding: 0;
+    list-style: none;
+}
+
+nav.toc ul .toctext {
+    color: inherit;
+    display: block;
+}
+
+nav.toc ul a:hover {
+    color: #fcfcfc;
+    background-color: #4e4a4a;
+}
+
+nav.toc ul.internal a {
+    color: inherit;
+    display: block;
+}
+
+nav.toc ul.internal a:hover {
+    background-color: #d6d6d6;
+}
+
+nav.toc ul.internal {
+    background-color: #e3e3e3;
+    box-shadow: inset -14px 0px 5px -12px rgb(210,210,210);
+    list-style: none;
+}
+
+nav.toc ul.internal li.toplevel {
+    border-top: 1px solid #c9c9c9;
+    font-weight: bold;
+}
+
+nav.toc ul.internal li.toplevel:first-child {
+    border-top: none;
+}
+
+nav.toc .toctext {
+    padding-top: 0.3em;
+    padding-bottom: 0.3em;
+    padding-right: 1em;
+}
+
+nav.toc ul .toctext {
+    padding-left: 1em;
+}
+
+nav.toc ul ul .toctext {
+    padding-left: 2em;
+}
+
+nav.toc ul ul ul .toctext {
+    padding-left: 3em;
+}
+
+nav.toc li.current > .toctext {
+    border-top: 1px solid #c9c9c9;
+    border-bottom: 1px solid #c9c9c9;
+    color: #404040;
+    font-weight: bold;
+    background-color: white;
+}
+
+article {
+    margin-left: 20em;
+    min-width: 20em;
+    max-width: 48em;
+    padding: 2em;
+}
+
+article > header {}
+
+article > header div#topbar {
+    display: none;
+}
+
+article > header nav ul {
+    display: inline-block;
+    list-style: none;
+    margin: 0;
+    padding: 0;
+}
+
+article > header nav li {
+    display: inline-block;
+    padding-right: 0.2em;
+}
+
+article > header nav li:before {
+    content: "»";
+    padding-right: 0.2em;
+}
+
+article > header .edit-page {
+    float: right;
+}
+
+article > footer {}
+
+article > footer a.prev {
+    float: left;
+}
+article > footer a.next {
+    float: right;
+}
+
+article > footer a .direction:after {
+    content: ": ";
+}
+
+article hr {
+    margin: 1em 0;
+}
+
+article section.docstring {
+    border: 1px solid #ddd;
+    margin: 0.5em 0;
+    padding: 0.5em;
+    border-radius: 3px;
+}
+
+article section.docstring .docstring-header {
+    margin-bottom: 1em;
+}
+
+article section.docstring .docstring-binding {
+    color: #333;
+    font-weight: bold;
+}
+
+article section.docstring .docstring-category {
+    font-style: italic;
+}
+
+article section.docstring a.source-link {
+  float: left;
+  font-weight: bold;
+}
+
+.nav-anchor,
+.nav-anchor:hover,
+.nav-anchor:visited {
+    color: #333;
+}
+
+/*
+ * Admonitions
+ *
+ * Colors (title, body)
+ * warning: #f0b37e #ffedcc (orange)
+ * note:    #6ab0de #e7f2fa (blue)
+ * tip:     #1abc9c #dbfaf4 (green)
+*/
+.admonition {
+    border-radius: 3px;
+    background-color: #eeeeee;
+}
+
+.admonition-title {
+    border-radius: 3px 3px 0 0;
+    background-color: #9b9b9b;
+    padding: 0.15em 0.5em;
+}
+
+.admonition-text {
+    padding: 0.5em;
+}
+
+.admonition-text > :first-child {
+    margin-top: 0;
+}
+
+.admonition-text > :last-child {
+    margin-bottom: 0;
+}
+
+.admonition > .admonition-title:before {
+    font-family: "FontAwesome";
+    margin-right: 5px;
+    content: "\f06a";
+}
+
+.admonition.warning > .admonition-title {
+    background-color: #f0b37e;
+}
+
+.admonition.warning {
+    background-color: #ffedcc;
+}
+
+.admonition.note > .admonition-title {
+    background-color: #6ab0de;
+}
+
+.admonition.note {
+    background-color: #e7f2fa;
+}
+
+.admonition.tip > .admonition-title {
+    background-color: #1abc9c;
+}
+
+.admonition.tip {
+    background-color: #dbfaf4;
+}
+
+
+/* footnotes */
+.footnote {
+    padding-left: 0.8em;
+    border-left: 2px solid #ccc;
+}
+
+/* Search page */
+#search-results .category {
+    font-size: smaller;
+}
+
+#search-results .category:before {
+    content: " ";
+}
+
+/* Overriding the <code> block style of highligh.js.
+ * We have to override the padding and the background-color, since we style this
+ * part ourselves. Specifically, we style the <pre> surrounding the <code>, while
+ * highlight.js applies the .hljs style directly to the <code> tag.
+ */
+.hljs {
+    background-color: transparent;
+    padding: 0;
+}
+
+@media only screen and (max-width: 768px) {
+    nav.toc {
+        position: fixed;
+        overflow-y: scroll;
+        width: 16em;
+        left: -16em;
+        -webkit-overflow-scrolling: touch;
+        -webkit-transition-property: left; /* Safari */
+        -webkit-transition-duration: 0.3s; /* Safari */
+        transition-property: left;
+        transition-duration: 0.3s;
+        -webkit-transition-timing-function: ease-out; /* Safari */
+        transition-timing-function: ease-out;
+        z-index: 2;
+    }
+
+    nav.toc.show {
+        left: 0;
+    }
+
+    article {
+        margin-left: 0;
+        padding: 3em 0.9em 0 0.9em; /* top right bottom left */
+        overflow-wrap: break-word;
+    }
+
+    article > header {
+        position: fixed;
+        left: 0;
+        z-index: 1;
+    }
+
+    article > header nav, hr {
+        display: none;
+    }
+
+    article > header div#topbar {
+        display: block; /* is mobile */
+        position: fixed;
+        width: 100%;
+        height: 1.5em;
+        padding-top: 1em;
+        padding-bottom: 1em;
+        background-color: #fcfcfc;
+        box-shadow: 0 1px 3px rgba(0,0,0,.26);
+        top: 0;
+        -webkit-transition-property: top; /* Safari */
+        -webkit-transition-duration: 0.3s; /* Safari */
+        transition-property: top;
+        transition-duration: 0.3s;
+    }
+
+    article > header div#topbar.headroom--unpinned.headroom--not-top.headroom--not-bottom {
+        top: -4em;
+        -webkit-transition-property: top; /* Safari */
+        -webkit-transition-duration: 0.7s; /* Safari */
+        transition-property: top;
+        transition-duration: 0.7s;
+    }
+
+    article > header div#topbar span {
+        position: fixed;
+        width: 80%;
+        height: 1.5em;
+        margin-top: -0.1em;
+        margin-left: 0.9em;
+        font-size: 1.2em;
+        overflow: hidden;
+    }
+
+    article > header div#topbar a.fa-bars {
+        float: right;
+        padding: 0.6em;
+        margin-top: -0.6em;
+        margin-right: 0.3em;
+        font-size: 1.5em;
+    }
+
+    article > header div#topbar a.fa-bars:visited {
+        color: #3091d1;
+    }
+
+    article table {
+        overflow-x: auto;
+        display: block;
+    }
+
+    article div.MathJax_Display {
+        overflow: scroll;
+    }
+
+    article span.MathJax {
+        overflow: hidden;
+    }
+}
+
+@media only screen and (max-width: 320px) {
+    body {
+        font-size: 15px;
+    }
+}
--- a/release-0.3/assets/documenter.js
+++ b/release-0.3/assets/documenter.js
@ -0,0 +1,129 @@
+/*
+ * Part of Documenter.jl
+ *     https://github.com/JuliaDocs/Documenter.jl
+ *
+ * License: MIT
+ */
+
+requirejs.config({
+    paths: {
+        'jquery': 'https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min',
+        'jqueryui': 'https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.12.0/jquery-ui.min',
+        'headroom': 'https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.3/headroom.min',
+        'mathjax': 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS_HTML',
+        'highlight': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min',
+        'highlight-julia': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/julia.min',
+        'highlight-julia-repl': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/julia-repl.min',
+    },
+    shim: {
+        'mathjax' : {
+            exports: "MathJax"
+        },
+        'highlight-julia': ['highlight'],
+        'highlight-julia-repl': ['highlight'],
+    }
+});
+
+// Load MathJax
+require(['mathjax'], function(MathJax) {
+    MathJax.Hub.Config({
+      "tex2jax": {
+        inlineMath: [['$','$'], ['\\(','\\)']],
+        processEscapes: true
+      }
+    });
+    MathJax.Hub.Config({
+      config: ["MMLorHTML.js"],
+      jax: [
+        "input/TeX",
+        "output/HTML-CSS",
+        "output/NativeMML"
+      ],
+      extensions: [
+        "MathMenu.js",
+        "MathZoom.js",
+        "TeX/AMSmath.js",
+        "TeX/AMSsymbols.js",
+        "TeX/autobold.js",
+        "TeX/autoload-all.js"
+      ]
+    });
+    MathJax.Hub.Config({
+      TeX: { equationNumbers: { autoNumber: "AMS" } }
+    });
+})
+
+require(['jquery', 'highlight', 'highlight-julia', 'highlight-julia-repl'], function($, hljs) {
+    $(document).ready(function() {
+        hljs.initHighlighting();
+    })
+
+})
+
+// update the version selector with info from the siteinfo.js and ../versions.js files
+require(['jquery'], function($) {
+    $(document).ready(function() {
+        var version_selector = $("#version-selector");
+
+        // add the current version to the selector based on siteinfo.js, but only if the selector is empty
+        if (typeof DOCUMENTER_CURRENT_VERSION !== 'undefined' && $('#version-selector > option').length == 0) {
+            var option = $("<option value='#' selected='selected'>" + DOCUMENTER_CURRENT_VERSION + "</option>");
+            version_selector.append(option);
+        }
+
+        if (typeof DOC_VERSIONS !== 'undefined') {
+            var existing_versions = $('#version-selector > option');
+            var existing_versions_texts = existing_versions.map(function(i,x){return x.text});
+            DOC_VERSIONS.forEach(function(each) {
+                var version_url = documenterBaseURL + "/../" + each;
+                var existing_id = $.inArray(each, existing_versions_texts);
+                // if not already in the version selector, add it as a new option,
+                // otherwise update the old option with the URL and enable it
+                if (existing_id == -1) {
+                    var option = $("<option value='" + version_url + "'>" + each + "</option>");
+                    version_selector.append(option);
+                } else {
+                    var option = existing_versions[existing_id];
+                    option.value = version_url;
+                    option.disabled = false;
+                }
+            });
+        }
+
+        // only show the version selector if the selector has been populated
+        if ($('#version-selector > option').length > 0) {
+            version_selector.css("visibility", "visible");
+        }
+    })
+
+})
+
+// mobile
+require(['jquery', 'headroom'], function($, Headroom) {
+    $(document).ready(function() {
+        var navtoc = $("nav.toc");
+        $("nav.toc li.current a.toctext").click(function() {
+            navtoc.toggleClass('show');
+        });
+        $("article > header div#topbar a.fa-bars").click(function(ev) {
+            ev.preventDefault();
+            navtoc.toggleClass('show');
+            if (navtoc.hasClass('show')) {
+                var title = $("article > header div#topbar span").text();
+                $("nav.toc ul li a:contains('" + title + "')").focus();
+            }
+        });
+        $("article#docs").bind('click', function(ev) {
+            if ($(ev.target).is('div#topbar a.fa-bars')) {
+                return;
+            }
+            if (navtoc.hasClass('show')) {
+                navtoc.removeClass('show');
+            }
+        });
+        if ($("article > header div#topbar").css('display') == 'block') {
+            var headroom = new Headroom(document.querySelector("article > header div#topbar"), {"tolerance": {"up": 10, "down": 10}});
+            headroom.init();
+        }
+    })
+})
--- a/release-0.3/assets/search.js
+++ b/release-0.3/assets/search.js
@ -0,0 +1,91 @@
+/*
+ * Part of Documenter.jl
+ *     https://github.com/JuliaDocs/Documenter.jl
+ *
+ * License: MIT
+ */
+
+// parseUri 1.2.2
+// (c) Steven Levithan <stevenlevithan.com>
+// MIT License
+function parseUri (str) {
+	var	o   = parseUri.options,
+		m   = o.parser[o.strictMode ? "strict" : "loose"].exec(str),
+		uri = {},
+		i   = 14;
+
+	while (i--) uri[o.key[i]] = m[i] || "";
+
+	uri[o.q.name] = {};
+	uri[o.key[12]].replace(o.q.parser, function ($0, $1, $2) {
+		if ($1) uri[o.q.name][$1] = $2;
+	});
+
+	return uri;
+};
+parseUri.options = {
+	strictMode: false,
+	key: ["source","protocol","authority","userInfo","user","password","host","port","relative","path","directory","file","query","anchor"],
+	q:   {
+		name:   "queryKey",
+		parser: /(?:^|&)([^&=]*)=?([^&]*)/g
+	},
+	parser: {
+		strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
+		loose:  /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/
+	}
+};
+
+requirejs.config({
+    paths: {
+        'jquery': 'https://code.jquery.com/jquery-3.1.0.js?',
+        'lunr': 'https://cdnjs.cloudflare.com/ajax/libs/lunr.js/0.7.1/lunr.min',
+    }
+});
+
+var currentScript = document.currentScript;
+
+require(["jquery", "lunr"], function($, lunr) {
+    var index = lunr(function () {
+        this.ref('location')
+        this.field('title', {boost: 10})
+        this.field('text')
+    })
+    var store = {}
+
+    documenterSearchIndex['docs'].forEach(function(e) {
+        index.add(e)
+        store[e.location] = e
+    })
+
+    $(function(){
+        function update_search(query) {
+            results = index.search(query)
+            $('#search-info').text("Number of results: " + results.length)
+            $('#search-results').empty()
+            results.forEach(function(result) {
+                data = store[result.ref]
+                link = $('<a>')
+                link.text(data.title)
+                link.attr('href', documenterBaseURL+'/'+result.ref)
+                cat = $('<span class="category">('+data.category+')</span>')
+                li = $('<li>').append(link).append(cat)
+                $('#search-results').append(li)
+            })
+        }
+
+        function update_search_box() {
+            query = $('#search-query').val()
+            update_search(query)
+        }
+
+        $('#search-query').keyup(update_search_box)
+        $('#search-query').change(update_search_box)
+
+        search_query = parseUri(window.location).queryKey["q"]
+        if(search_query !== undefined) {
+            $("#search-query").val(search_query)
+        }
+        update_search_box();
+    })
+})
--- a/release-0.3/contributing.html
+++ b/release-0.3/contributing.html
@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Contributing &amp; Help · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li class="current"><a class="toctext" href="contributing.html">Contributing &amp; Help</a><ul class="internal"></ul></li></ul></nav><article id="docs"><header><nav><ul><li><a href="contributing.html">Contributing &amp; Help</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/contributing.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Contributing &amp; Help</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Contributing-and-Help-1" href="#Contributing-and-Help-1">Contributing &amp; Help</a></h1><p>If you need help, please ask on the <a href="https://discourse.julialang.org/">Julia forum</a>, the <a href="https://discourse.julialang.org/t/announcing-a-julia-slack/4866">slack</a> (channel #machine-learning), or Flux&#39;s <a href="https://gitter.im/FluxML/Lobby">Gitter</a>.</p><p>Right now, the best way to help out is to try out the examples and report any issues or missing features as you find them. The second best way is to help us spread the word, perhaps by <a href="https://github.com/MikeInnes/Flux.jl">starring the repo</a>.</p><p>If you&#39;re interested in hacking on Flux, most of the <a href="https://github.com/MikeInnes/Flux.jl/tree/master/src">code</a> is pretty straightforward. Adding new <a href="https://github.com/MikeInnes/Flux.jl/tree/master/src/layers">layer definitions</a> or cost functions is simple using the Flux DSL itself, and things like data utilities and training processes are all plain Julia code.</p><p>If you get stuck or need anything, let us know!</p><footer><hr/><a class="previous" href="data/onehot.html"><span class="direction">Previous</span><span class="title">One-Hot Encoding</span></a></footer></article></body></html>
--- a/release-0.3/data/onehot.html
+++ b/release-0.3/data/onehot.html
@ -0,0 +1,40 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>One-Hot Encoding · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li class="current"><a class="toctext" href="onehot.html">One-Hot Encoding</a><ul class="internal"><li><a class="toctext" href="#Batches-1">Batches</a></li></ul></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Data Munging</li><li><a href="onehot.html">One-Hot Encoding</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/data/onehot.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>One-Hot Encoding</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="One-Hot-Encoding-1" href="#One-Hot-Encoding-1">One-Hot Encoding</a></h1><p>It&#39;s common to encode categorical variables (like <code>true</code>, <code>false</code> or <code>cat</code>, <code>dog</code>) in &quot;one-of-k&quot; or <a href="https://en.wikipedia.org/wiki/One-hot">&quot;one-hot&quot;</a> form. Flux provides the <code>onehot</code> function to make this easy.</p><pre><code class="language-none">julia&gt; using Flux: onehot
+
+julia&gt; onehot(:b, [:a, :b, :c])
+3-element Flux.OneHotVector:
+ false
+  true
+ false
+
+julia&gt; onehot(:c, [:a, :b, :c])
+3-element Flux.OneHotVector:
+ false
+ false
+  true</code></pre><p>The inverse is <code>argmax</code> (which can take a general probability distribution, as well as just booleans).</p><pre><code class="language-julia">julia&gt; argmax(ans, [:a, :b, :c])
+:c
+
+julia&gt; argmax([true, false, false], [:a, :b, :c])
+:a
+
+julia&gt; argmax([0.3, 0.2, 0.5], [:a, :b, :c])
+:c</code></pre><h2><a class="nav-anchor" id="Batches-1" href="#Batches-1">Batches</a></h2><p><code>onehotbatch</code> creates a batch (matrix) of one-hot vectors, and <code>argmax</code> treats matrices as batches.</p><pre><code class="language-julia">julia&gt; using Flux: onehotbatch
+
+julia&gt; onehotbatch([:b, :a, :b], [:a, :b, :c])
+3×3 Flux.OneHotMatrix:
+ false   true  false
+  true  false   true
+ false  false  false
+
+julia&gt; onecold(ans, [:a, :b, :c])
+3-element Array{Symbol,1}:
+  :b
+  :a
+  :b</code></pre><p>Note that these operations returned <code>OneHotVector</code> and <code>OneHotMatrix</code> rather than <code>Array</code>s. <code>OneHotVector</code>s behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly.. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood.</p><footer><hr/><a class="previous" href="../training/training.html"><span class="direction">Previous</span><span class="title">Training</span></a><a class="next" href="../contributing.html"><span class="direction">Next</span><span class="title">Contributing &amp; Help</span></a></footer></article></body></html>
--- a/release-0.3/index.html
+++ b/release-0.3/index.html
@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Home · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li class="current"><a class="toctext" href="index.html">Home</a><ul class="internal"><li class="toplevel"><a class="toctext" href="#Installation-1">Installation</a></li></ul></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="index.html">Home</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/index.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Home</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Flux:-The-Julia-Machine-Learning-Library-1" href="#Flux:-The-Julia-Machine-Learning-Library-1">Flux: The Julia Machine Learning Library</a></h1><p>Flux is a library for machine learning. It comes &quot;batteries-included&quot; with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the <a href="https://github.com/FluxML/CuArrays.jl">GPU kernels</a>) and any part can be tweaked to your liking.</p><h1><a class="nav-anchor" id="Installation-1" href="#Installation-1">Installation</a></h1><p>Install <a href="https://julialang.org/downloads/">Julia 0.6.0 or later</a>, if you haven&#39;t already.</p><pre><code class="language-julia">Pkg.add(&quot;Flux&quot;)
+Pkg.test(&quot;Flux&quot;) # Check things installed correctly</code></pre><p>Start with the <a href="basics.html">basics</a>. The <a href="https://github.com/FluxML/model-zoo/">model zoo</a> is also a good starting point for many common kinds of models.</p><footer><hr/><a class="next" href="models/basics.html"><span class="direction">Next</span><span class="title">Basics</span></a></footer></article></body></html>
--- a/release-0.3/models/basics.html
+++ b/release-0.3/models/basics.html
@ -0,0 +1,78 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Basics · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li class="current"><a class="toctext" href="basics.html">Basics</a><ul class="internal"><li><a class="toctext" href="#Taking-Gradients-1">Taking Gradients</a></li><li><a class="toctext" href="#Building-Layers-1">Building Layers</a></li><li><a class="toctext" href="#Stacking-It-Up-1">Stacking It Up</a></li></ul></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li><a class="toctext" href="layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="basics.html">Basics</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/basics.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Basics</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Model-Building-Basics-1" href="#Model-Building-Basics-1">Model-Building Basics</a></h1><h2><a class="nav-anchor" id="Taking-Gradients-1" href="#Taking-Gradients-1">Taking Gradients</a></h2><p>Consider a simple linear regression, which tries to predict an output array <code>y</code> from an input <code>x</code>. (It&#39;s a good idea to follow this example in the Julia repl.)</p><pre><code class="language-julia">W = rand(2, 5)
+b = rand(2)
+
+predict(x) = W*x .+ b
+loss(x, y) = sum((predict(x) .- y).^2)
+
+x, y = rand(5), rand(2) # Dummy data
+loss(x, y) # ~ 3</code></pre><p>To improve the prediction we can take the gradients of <code>W</code> and <code>b</code> with respect to the loss function and perform gradient descent. We could calculate gradients by hand, but Flux will do it for us if we tell it that <code>W</code> and <code>b</code> are trainable <em>parameters</em>.</p><pre><code class="language-julia">using Flux.Tracker: param, back!, data, grad
+
+W = param(W)
+b = param(b)
+
+l = loss(x, y)
+
+back!(l)</code></pre><p><code>loss(x, y)</code> returns the same number, but it&#39;s now a <em>tracked</em> value that records gradients as it goes along. Calling <code>back!</code> then calculates the gradient of <code>W</code> and <code>b</code>. We can see what this gradient is, and modify <code>W</code> to train the model.</p><pre><code class="language-julia">grad(W)
+
+W.data .-= 0.1grad(W)
+
+loss(x, y) # ~ 2.5</code></pre><p>The loss has decreased a little, meaning that our prediction <code>x</code> is closer to the target <code>y</code>. If we have some data we can already try <a href="training/training.html">training the model</a>.</p><p>All deep learning in Flux, however complex, is a simple generalisation of this example. Of course, models can <em>look</em> very different – they might have millions of parameters or complex control flow, and there are ways to manage this complexity. Let&#39;s see what that looks like.</p><h2><a class="nav-anchor" id="Building-Layers-1" href="#Building-Layers-1">Building Layers</a></h2><p>It&#39;s common to create more complex models than the linear regression above. For example, we might want to have two linear layers with a nonlinearity like <a href="https://en.wikipedia.org/wiki/Sigmoid_function">sigmoid</a> (<code>σ</code>) in between them. In the above style we could write this as:</p><pre><code class="language-julia">W1 = param(rand(3, 5))
+b1 = param(rand(3))
+layer1(x) = W1 * x .+ b1
+
+W2 = param(rand(2, 3))
+b2 = param(rand(2))
+layer2(x) = W2 * x .+ b2
+
+model(x) = layer2(σ.(layer1(x)))
+
+model(rand(5)) # =&gt; 2-element vector</code></pre><p>This works but is fairly unwieldy, with a lot of repetition – especially as we add more layers. One way to factor this out is to create a function that returns linear layers.</p><pre><code class="language-julia">function linear(in, out)
+  W = param(randn(out, in))
+  b = param(randn(out))
+  x -&gt; W * x .+ b
+end
+
+linear1 = linear(5, 3) # we can access linear1.W etc
+linear2 = linear(3, 2)
+
+model(x) = linear2(σ.(linear1(x)))
+
+model(x) # =&gt; 2-element vector</code></pre><p>Another (equivalent) way is to create a struct that explicitly represents the affine layer.</p><pre><code class="language-julia">struct Affine
+  W
+  b
+end
+
+Affine(in::Integer, out::Integer) =
+  Affine(param(randn(out, in)), param(randn(out)))
+
+# Overload call, so the object can be used as a function
+(m::Affine)(x) = m.W * x .+ m.b
+
+a = Affine(10, 5)
+
+a(rand(10)) # =&gt; 5-element vector</code></pre><p>Congratulations! You just built the <code>Dense</code> layer that comes with Flux. Flux has many interesting layers available, but they&#39;re all things you could have built yourself very easily.</p><p>(There is one small difference with <code>Dense</code> – for convenience it also takes an activation function, like <code>Dense(10, 5, σ)</code>.)</p><h2><a class="nav-anchor" id="Stacking-It-Up-1" href="#Stacking-It-Up-1">Stacking It Up</a></h2><p>It&#39;s pretty common to write models that look something like:</p><pre><code class="language-julia">layer1 = Dense(10, 5, σ)
+# ...
+model(x) = layer3(layer2(layer1(x)))</code></pre><p>For long chains, it might be a bit more intuitive to have a list of layers, like this:</p><pre><code class="language-julia">using Flux
+
+layers = [Dense(10, 5, σ), Dense(5, 2), softmax]
+
+model(x) = foldl((x, m) -&gt; m(x), x, layers)
+
+model(rand(10)) # =&gt; 2-element vector</code></pre><p>Handily, this is also provided for in Flux:</p><pre><code class="language-julia">model2 = Chain(
+  Dense(10, 5, σ),
+  Dense(5, 2),
+  softmax)
+
+model2(rand(10)) # =&gt; 2-element vector</code></pre><p>This quickly starts to look like a high-level deep learning library; yet you can see how it falls out of simple abstractions, and we lose none of the power of Julia code.</p><p>A nice property of this approach is that because &quot;models&quot; are just functions (possibly with trainable parameters), you can also see this as simple function composition.</p><pre><code class="language-julia">m = Dense(5, 2) ∘ Dense(10, 5, σ)
+
+m(rand(10))</code></pre><p>Likewise, <code>Chain</code> will happily work with any Julia function.</p><pre><code class="language-julia">m = Chain(x -&gt; x^2, x -&gt; x+1)
+
+m(5) # =&gt; 26</code></pre><footer><hr/><a class="previous" href="../index.html"><span class="direction">Previous</span><span class="title">Home</span></a><a class="next" href="recurrence.html"><span class="direction">Next</span><span class="title">Recurrence</span></a></footer></article></body></html>
--- a/release-0.3/models/layers.html
+++ b/release-0.3/models/layers.html
@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Layer Reference · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li class="current"><a class="toctext" href="layers.html">Layer Reference</a><ul class="internal"><li><a class="toctext" href="#Model-Layers-1">Model Layers</a></li></ul></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="layers.html">Layer Reference</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/layers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Layer Reference</span><a class="fa fa-bars" href="#"></a></div></header><h2><a class="nav-anchor" id="Model-Layers-1" href="#Model-Layers-1">Model Layers</a></h2><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Chain" href="#Flux.Chain"><code>Flux.Chain</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Chain(layers...)</code></pre><p>Chain multiple layers / functions together, so that they are called in sequence on a given input.</p><pre><code class="language-none">m = Chain(x -&gt; x^2, x -&gt; x+1)
+m(5) == 26
+
+m = Chain(Dense(10, 5), Dense(5, 2))
+x = rand(10)
+m(x) == m[2](m[1](x))</code></pre><p><code>Chain</code> also supports indexing and slicing, e.g. <code>m[2]</code> or <code>m[1:end-1]</code>. <code>m[1:3](x)</code> will calculate the output of the first three layers.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/src/layers/basic.jl#L1-L16">source</a><br/></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Dense" href="#Flux.Dense"><code>Flux.Dense</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Dense(in::Integer, out::Integer, σ = identity)</code></pre><p>Creates a traditional <code>Dense</code> layer with parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-none">y = σ.(W * x .+ b)</code></pre><p>The input <code>x</code> must be a vector of length <code>in</code>, or a batch of vectors represented as an <code>in × N</code> matrix. The out <code>y</code> will be a vector or batch of length <code>in</code>.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/src/layers/basic.jl#L40-L49">source</a><br/></section><footer><hr/><a class="previous" href="recurrence.html"><span class="direction">Previous</span><span class="title">Recurrence</span></a><a class="next" href="../training/optimisers.html"><span class="direction">Next</span><span class="title">Optimisers</span></a></footer></article></body></html>
--- a/release-0.3/models/recurrence.html
+++ b/release-0.3/models/recurrence.html
@ -0,0 +1,43 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Recurrence · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li class="current"><a class="toctext" href="recurrence.html">Recurrence</a><ul class="internal"><li><a class="toctext" href="#Recurrent-Cells-1">Recurrent Cells</a></li><li><a class="toctext" href="#Stateful-Models-1">Stateful Models</a></li><li><a class="toctext" href="#Sequences-1">Sequences</a></li><li><a class="toctext" href="#Truncating-Gradients-1">Truncating Gradients</a></li></ul></li><li><a class="toctext" href="layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="recurrence.html">Recurrence</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/recurrence.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Recurrence</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Recurrent-Models-1" href="#Recurrent-Models-1">Recurrent Models</a></h1><h2><a class="nav-anchor" id="Recurrent-Cells-1" href="#Recurrent-Cells-1">Recurrent Cells</a></h2><p>In the simple feedforward case, our model <code>m</code> is a simple function from various inputs <code>xᵢ</code> to predictions <code>yᵢ</code>. (For example, each <code>x</code> might be an MNIST digit and each <code>y</code> a digit label.) Each prediction is completely independent of any others, and using the same <code>x</code> will always produce the same <code>y</code>.</p><pre><code class="language-julia">y₁ = f(x₁)
+y₂ = f(x₂)
+y₃ = f(x₃)
+# ...</code></pre><p>Recurrent networks introduce a <em>hidden state</em> that gets carried over each time we run the model. The model now takes the old <code>h</code> as an input, and produces a new <code>h</code> as output, each time we run it.</p><pre><code class="language-julia">h = # ... initial state ...
+y₁, h = f(x₁, h)
+y₂, h = f(x₂, h)
+y₃, h = f(x₃, h)
+# ...</code></pre><p>Information stored in <code>h</code> is preserved for the next prediction, allowing it to function as a kind of memory. This also means that the prediction made for a given <code>x</code> depends on all the inputs previously fed into the model.</p><p>(This might be important if, for example, each <code>x</code> represents one word of a sentence; the model&#39;s interpretation of the word &quot;bank&quot; should change if the previous input was &quot;river&quot; rather than &quot;investment&quot;.)</p><p>Flux&#39;s RNN support closely follows this mathematical perspective. The most basic RNN is as close as possible to a standard <code>Dense</code> layer, and the output and hidden state are the same. By convention, the hidden state is the first input and output.</p><pre><code class="language-julia">Wxh = randn(5, 10)
+Whh = randn(5, 5)
+b   = randn(5)
+
+function rnn(h, x)
+  h = tanh.(Wxh * x .+ Whh * h .+ b)
+  return h, h
+end
+
+x = rand(10) # dummy data
+h = rand(5)  # initial hidden state
+
+h, y = rnn(h, x)</code></pre><p>If you run the last line a few times, you&#39;ll notice the output <code>y</code> changing slightly even though the input <code>x</code> is the same.</p><p>We sometimes refer to functions like <code>rnn</code> above, which explicitly manage state, as recurrent <em>cells</em>. There are various recurrent cells available, which are documented in the <a href="models/layers.html">layer reference</a>. The hand-written example above can be replaced with:</p><pre><code class="language-julia">using Flux
+
+m = Flux.RNNCell(10, 5)
+
+x = rand(10) # dummy data
+h = rand(5)  # initial hidden state
+
+h, y = rnn(h, x)</code></pre><h2><a class="nav-anchor" id="Stateful-Models-1" href="#Stateful-Models-1">Stateful Models</a></h2><p>For the most part, we don&#39;t want to manage hidden states ourselves, but to treat our models as being stateful. Flux provides the <code>Recur</code> wrapper to do this.</p><pre><code class="language-julia">x = rand(10)
+h = rand(5)
+
+m = Flux.Recur(rnn, h)
+
+y = m(x)</code></pre><p>The <code>Recur</code> wrapper stores the state between runs in the <code>m.state</code> field.</p><p>If you use the <code>RNN(10, 5)</code> constructor – as opposed to <code>RNNCell</code> – you&#39;ll see that it&#39;s simply a wrapped cell.</p><pre><code class="language-julia">julia&gt; RNN(10, 5)
+Recur(RNNCell(Dense(15, 5)))</code></pre><h2><a class="nav-anchor" id="Sequences-1" href="#Sequences-1">Sequences</a></h2><p>Often we want to work with sequences of inputs, rather than individual <code>x</code>s.</p><pre><code class="language-julia">seq = [rand(10) for i = 1:10]</code></pre><p>With <code>Recur</code>, applying our model to each element of a sequence is trivial:</p><pre><code class="language-julia">map(m, seq) # returns a list of 5-element vectors</code></pre><p>To make this a bit more convenient, Flux has the <code>Seq</code> type. This is just a list, but tagged so that we know it&#39;s meant to be used as a sequence of data points.</p><pre><code class="language-julia">seq = Seq([rand(10) for i = 1:10])
+m(seq) # returns a new Seq of length 10</code></pre><p>When we apply the model <code>m</code> to a seq, it gets mapped over every item in the sequence in order. This is just like the code above, but often more convenient.</p><p>You can get this behaviour more generally with the <code>Over</code> wrapper.</p><pre><code class="language-julia">m = Over(Dense(10,5))
+m(seq) # returns a new Seq of length 10</code></pre><h2><a class="nav-anchor" id="Truncating-Gradients-1" href="#Truncating-Gradients-1">Truncating Gradients</a></h2><p>By default, calculating the gradients in a recurrent layer involves the entire history. For example, if we call the model on 100 inputs, calling <code>back!</code> will calculate the gradient for those 100 calls. If we then calculate another 10 inputs we have to calculate 110 gradients – this accumulates and quickly becomes expensive.</p><p>To avoid this we can <em>truncate</em> the gradient calculation, forgetting the history.</p><pre><code class="language-julia">truncate!(m)</code></pre><p>Calling <code>truncate!</code> wipes the slate clean, so we can call the model with more inputs without building up an expensive gradient computation.</p><footer><hr/><a class="previous" href="basics.html"><span class="direction">Previous</span><span class="title">Basics</span></a><a class="next" href="layers.html"><span class="direction">Next</span><span class="title">Layer Reference</span></a></footer></article></body></html>
--- a/release-0.3/search.html
+++ b/release-0.3/search.html
@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Search · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="contributing.html">Contributing &amp; Help</a></li></ul></nav><article><header><nav><ul><li>Search</li></ul></nav><hr/><div id="topbar"><span>Search</span><a class="fa fa-bars" href="#"></a></div></header><h1>Search</h1><p id="search-info">Number of results: <span id="search-results-number">loading...</span></p><ul id="search-results"></ul></article></body><script src="search_index.js"></script><script src="assets/search.js"></script></html>
--- a/release-0.3/search_index.js
+++ b/release-0.3/search_index.js
@ -0,0 +1,235 @@
+var documenterSearchIndex = {"docs": [
+
+{
+    "location": "index.html#",
+    "page": "Home",
+    "title": "Home",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "index.html#Flux:-The-Julia-Machine-Learning-Library-1",
+    "page": "Home",
+    "title": "Flux: The Julia Machine Learning Library",
+    "category": "section",
+    "text": "Flux is a library for machine learning. It comes \"batteries-included\" with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the GPU kernels) and any part can be tweaked to your liking."
+},
+
+{
+    "location": "index.html#Installation-1",
+    "page": "Home",
+    "title": "Installation",
+    "category": "section",
+    "text": "Install Julia 0.6.0 or later, if you haven't already.Pkg.add(\"Flux\")\nPkg.test(\"Flux\") # Check things installed correctlyStart with the basics. The model zoo is also a good starting point for many common kinds of models."
+},
+
+{
+    "location": "models/basics.html#",
+    "page": "Basics",
+    "title": "Basics",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "models/basics.html#Model-Building-Basics-1",
+    "page": "Basics",
+    "title": "Model-Building Basics",
+    "category": "section",
+    "text": ""
+},
+
+{
+    "location": "models/basics.html#Taking-Gradients-1",
+    "page": "Basics",
+    "title": "Taking Gradients",
+    "category": "section",
+    "text": "Consider a simple linear regression, which tries to predict an output array y from an input x. (It's a good idea to follow this example in the Julia repl.)W = rand(2, 5)\nb = rand(2)\n\npredict(x) = W*x .+ b\nloss(x, y) = sum((predict(x) .- y).^2)\n\nx, y = rand(5), rand(2) # Dummy data\nloss(x, y) # ~ 3To improve the prediction we can take the gradients of W and b with respect to the loss function and perform gradient descent. We could calculate gradients by hand, but Flux will do it for us if we tell it that W and b are trainable parameters.using Flux.Tracker: param, back!, data, grad\n\nW = param(W)\nb = param(b)\n\nl = loss(x, y)\n\nback!(l)loss(x, y) returns the same number, but it's now a tracked value that records gradients as it goes along. Calling back! then calculates the gradient of W and b. We can see what this gradient is, and modify W to train the model.grad(W)\n\nW.data .-= 0.1grad(W)\n\nloss(x, y) # ~ 2.5The loss has decreased a little, meaning that our prediction x is closer to the target y. If we have some data we can already try training the model.All deep learning in Flux, however complex, is a simple generalisation of this example. Of course, models can look very different – they might have millions of parameters or complex control flow, and there are ways to manage this complexity. Let's see what that looks like."
+},
+
+{
+    "location": "models/basics.html#Building-Layers-1",
+    "page": "Basics",
+    "title": "Building Layers",
+    "category": "section",
+    "text": "It's common to create more complex models than the linear regression above. For example, we might want to have two linear layers with a nonlinearity like sigmoid (σ) in between them. In the above style we could write this as:W1 = param(rand(3, 5))\nb1 = param(rand(3))\nlayer1(x) = W1 * x .+ b1\n\nW2 = param(rand(2, 3))\nb2 = param(rand(2))\nlayer2(x) = W2 * x .+ b2\n\nmodel(x) = layer2(σ.(layer1(x)))\n\nmodel(rand(5)) # => 2-element vectorThis works but is fairly unwieldy, with a lot of repetition – especially as we add more layers. One way to factor this out is to create a function that returns linear layers.function linear(in, out)\n  W = param(randn(out, in))\n  b = param(randn(out))\n  x -> W * x .+ b\nend\n\nlinear1 = linear(5, 3) # we can access linear1.W etc\nlinear2 = linear(3, 2)\n\nmodel(x) = linear2(σ.(linear1(x)))\n\nmodel(x) # => 2-element vectorAnother (equivalent) way is to create a struct that explicitly represents the affine layer.struct Affine\n  W\n  b\nend\n\nAffine(in::Integer, out::Integer) =\n  Affine(param(randn(out, in)), param(randn(out)))\n\n# Overload call, so the object can be used as a function\n(m::Affine)(x) = m.W * x .+ m.b\n\na = Affine(10, 5)\n\na(rand(10)) # => 5-element vectorCongratulations! You just built the Dense layer that comes with Flux. Flux has many interesting layers available, but they're all things you could have built yourself very easily.(There is one small difference with Dense – for convenience it also takes an activation function, like Dense(10, 5, σ).)"
+},
+
+{
+    "location": "models/basics.html#Stacking-It-Up-1",
+    "page": "Basics",
+    "title": "Stacking It Up",
+    "category": "section",
+    "text": "It's pretty common to write models that look something like:layer1 = Dense(10, 5, σ)\n# ...\nmodel(x) = layer3(layer2(layer1(x)))For long chains, it might be a bit more intuitive to have a list of layers, like this:using Flux\n\nlayers = [Dense(10, 5, σ), Dense(5, 2), softmax]\n\nmodel(x) = foldl((x, m) -> m(x), x, layers)\n\nmodel(rand(10)) # => 2-element vectorHandily, this is also provided for in Flux:model2 = Chain(\n  Dense(10, 5, σ),\n  Dense(5, 2),\n  softmax)\n\nmodel2(rand(10)) # => 2-element vectorThis quickly starts to look like a high-level deep learning library; yet you can see how it falls out of simple abstractions, and we lose none of the power of Julia code.A nice property of this approach is that because \"models\" are just functions (possibly with trainable parameters), you can also see this as simple function composition.m = Dense(5, 2) ∘ Dense(10, 5, σ)\n\nm(rand(10))Likewise, Chain will happily work with any Julia function.m = Chain(x -> x^2, x -> x+1)\n\nm(5) # => 26"
+},
+
+{
+    "location": "models/recurrence.html#",
+    "page": "Recurrence",
+    "title": "Recurrence",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "models/recurrence.html#Recurrent-Models-1",
+    "page": "Recurrence",
+    "title": "Recurrent Models",
+    "category": "section",
+    "text": ""
+},
+
+{
+    "location": "models/recurrence.html#Recurrent-Cells-1",
+    "page": "Recurrence",
+    "title": "Recurrent Cells",
+    "category": "section",
+    "text": "In the simple feedforward case, our model m is a simple function from various inputs xᵢ to predictions yᵢ. (For example, each x might be an MNIST digit and each y a digit label.) Each prediction is completely independent of any others, and using the same x will always produce the same y.y₁ = f(x₁)\ny₂ = f(x₂)\ny₃ = f(x₃)\n# ...Recurrent networks introduce a hidden state that gets carried over each time we run the model. The model now takes the old h as an input, and produces a new h as output, each time we run it.h = # ... initial state ...\ny₁, h = f(x₁, h)\ny₂, h = f(x₂, h)\ny₃, h = f(x₃, h)\n# ...Information stored in h is preserved for the next prediction, allowing it to function as a kind of memory. This also means that the prediction made for a given x depends on all the inputs previously fed into the model.(This might be important if, for example, each x represents one word of a sentence; the model's interpretation of the word \"bank\" should change if the previous input was \"river\" rather than \"investment\".)Flux's RNN support closely follows this mathematical perspective. The most basic RNN is as close as possible to a standard Dense layer, and the output and hidden state are the same. By convention, the hidden state is the first input and output.Wxh = randn(5, 10)\nWhh = randn(5, 5)\nb   = randn(5)\n\nfunction rnn(h, x)\n  h = tanh.(Wxh * x .+ Whh * h .+ b)\n  return h, h\nend\n\nx = rand(10) # dummy data\nh = rand(5)  # initial hidden state\n\nh, y = rnn(h, x)If you run the last line a few times, you'll notice the output y changing slightly even though the input x is the same.We sometimes refer to functions like rnn above, which explicitly manage state, as recurrent cells. There are various recurrent cells available, which are documented in the layer reference. The hand-written example above can be replaced with:using Flux\n\nm = Flux.RNNCell(10, 5)\n\nx = rand(10) # dummy data\nh = rand(5)  # initial hidden state\n\nh, y = rnn(h, x)"
+},
+
+{
+    "location": "models/recurrence.html#Stateful-Models-1",
+    "page": "Recurrence",
+    "title": "Stateful Models",
+    "category": "section",
+    "text": "For the most part, we don't want to manage hidden states ourselves, but to treat our models as being stateful. Flux provides the Recur wrapper to do this.x = rand(10)\nh = rand(5)\n\nm = Flux.Recur(rnn, h)\n\ny = m(x)The Recur wrapper stores the state between runs in the m.state field.If you use the RNN(10, 5) constructor – as opposed to RNNCell – you'll see that it's simply a wrapped cell.julia> RNN(10, 5)\nRecur(RNNCell(Dense(15, 5)))"
+},
+
+{
+    "location": "models/recurrence.html#Sequences-1",
+    "page": "Recurrence",
+    "title": "Sequences",
+    "category": "section",
+    "text": "Often we want to work with sequences of inputs, rather than individual xs.seq = [rand(10) for i = 1:10]With Recur, applying our model to each element of a sequence is trivial:map(m, seq) # returns a list of 5-element vectorsTo make this a bit more convenient, Flux has the Seq type. This is just a list, but tagged so that we know it's meant to be used as a sequence of data points.seq = Seq([rand(10) for i = 1:10])\nm(seq) # returns a new Seq of length 10When we apply the model m to a seq, it gets mapped over every item in the sequence in order. This is just like the code above, but often more convenient.You can get this behaviour more generally with the Over wrapper.m = Over(Dense(10,5))\nm(seq) # returns a new Seq of length 10"
+},
+
+{
+    "location": "models/recurrence.html#Truncating-Gradients-1",
+    "page": "Recurrence",
+    "title": "Truncating Gradients",
+    "category": "section",
+    "text": "By default, calculating the gradients in a recurrent layer involves the entire history. For example, if we call the model on 100 inputs, calling back! will calculate the gradient for those 100 calls. If we then calculate another 10 inputs we have to calculate 110 gradients – this accumulates and quickly becomes expensive.To avoid this we can truncate the gradient calculation, forgetting the history.truncate!(m)Calling truncate! wipes the slate clean, so we can call the model with more inputs without building up an expensive gradient computation."
+},
+
+{
+    "location": "models/layers.html#",
+    "page": "Layer Reference",
+    "title": "Layer Reference",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "models/layers.html#Flux.Chain",
+    "page": "Layer Reference",
+    "title": "Flux.Chain",
+    "category": "Type",
+    "text": "Chain(layers...)\n\nChain multiple layers / functions together, so that they are called in sequence on a given input.\n\nm = Chain(x -> x^2, x -> x+1)\nm(5) == 26\n\nm = Chain(Dense(10, 5), Dense(5, 2))\nx = rand(10)\nm(x) == m[2](m[1](x))\n\nChain also supports indexing and slicing, e.g. m[2] or m[1:end-1]. m[1:3](x) will calculate the output of the first three layers.\n\n\n\n"
+},
+
+{
+    "location": "models/layers.html#Flux.Dense",
+    "page": "Layer Reference",
+    "title": "Flux.Dense",
+    "category": "Type",
+    "text": "Dense(in::Integer, out::Integer, σ = identity)\n\nCreates a traditional Dense layer with parameters W and b.\n\ny = σ.(W * x .+ b)\n\nThe input x must be a vector of length in, or a batch of vectors represented as an in × N matrix. The out y will be a vector or batch of length in.\n\n\n\n"
+},
+
+{
+    "location": "models/layers.html#Model-Layers-1",
+    "page": "Layer Reference",
+    "title": "Model Layers",
+    "category": "section",
+    "text": "Chain\nDense"
+},
+
+{
+    "location": "training/optimisers.html#",
+    "page": "Optimisers",
+    "title": "Optimisers",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "training/optimisers.html#Optimisers-1",
+    "page": "Optimisers",
+    "title": "Optimisers",
+    "category": "section",
+    "text": "Consider a simple linear regression. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters W and b.W = param(rand(2, 5))\nb = param(rand(2))\n\npredict(x) = W*x .+ b\nloss(x, y) = sum((predict(x) .- y).^2)\n\nx, y = rand(5), rand(2) # Dummy data\nl = loss(x, y) # ~ 3\nback!(l)We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here's one way to do that:using Flux.Tracker: data, grad\n\nfunction update()\n  η = 0.1 # Learning Rate\n  for p in (W, b)\n    x, Δ = data(p), grad(p)\n    x .-= η .* Δ # Apply the update\n    Δ .= 0       # Clear the gradient\n  end\nendIf we call update, the parameters W and b will change and our loss should go down.There are two pieces here: one is that we need a list of trainable parameters for the model ([W, b] in this case), and the other is the update step. In this case the update is simply gradient descent (x .-= η .* Δ), but we might choose to do something more advanced, like adding momentum.In this case, getting the variables is trivial, but you can imagine it'd be more of a pain with some complex stack of layers.m = Chain(\n  Dense(10, 5, σ),\n  Dense(5, 2), softmax)Instead of having to write [m[1].W, m[1].b, ...], Flux provides a params function params(m) that returns a list of all parameters in the model for you.For the update step, there's nothing whatsoever wrong with writing the loop above – it'll work just fine – but Flux provides various optimisers that make it more convenient.opt = SGD([W, b], 0.1) # Gradient descent with learning rate 0.1\n\nopt()An optimiser takes a parameter list and returns a function that does the same thing as update above. We can pass either opt or update to our training loop, which will then run the optimiser after every mini-batch of data."
+},
+
+{
+    "location": "training/training.html#",
+    "page": "Training",
+    "title": "Training",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "training/training.html#Training-1",
+    "page": "Training",
+    "title": "Training",
+    "category": "section",
+    "text": "To actually train a model we need three things:A loss function, that evaluates how well a model is doing given some input data.\nA collection of data points that will be provided to the loss function.\nAn optimiser that will update the model parameters appropriately.With these we can call Flux.train!:Flux.train!(loss, data, opt)There are plenty of examples in the model zoo."
+},
+
+{
+    "location": "training/training.html#Loss-Functions-1",
+    "page": "Training",
+    "title": "Loss Functions",
+    "category": "section",
+    "text": "The loss that we defined in basics is completely valid for training. We can also define a loss in terms of some model:m = Chain(\n  Dense(784, 32, σ),\n  Dense(32, 10), softmax)\n\nloss(x, y) = Flux.mse(m(x), y)The loss will almost always be defined in terms of some cost function that measures the distance of the prediction m(x) from the target y. Flux has several of these built in, like mse for mean squared error or logloss for cross entropy loss, but you can calculate it however you want."
+},
+
+{
+    "location": "training/training.html#Callbacks-1",
+    "page": "Training",
+    "title": "Callbacks",
+    "category": "section",
+    "text": "train! takes an additional argument, cb, that's used for callbacks so that you can observe the training process. For example:train!(loss, data, opt, cb = () -> println(\"training\"))Callbacks are called for every batch of training data. You can slow this down using Flux.throttle(f, timeout) which prevents f from being called more than once every timeout seconds.A more typical callback might look like this:test_x, test_y = # ... create single batch of test data ...\nevalcb() = @show(loss(test_x, test_y))\n\nFlux.train!(loss, data, opt,\n            cb = throttle(evalcb, 5))"
+},
+
+{
+    "location": "data/onehot.html#",
+    "page": "One-Hot Encoding",
+    "title": "One-Hot Encoding",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "data/onehot.html#One-Hot-Encoding-1",
+    "page": "One-Hot Encoding",
+    "title": "One-Hot Encoding",
+    "category": "section",
+    "text": "It's common to encode categorical variables (like true, false or cat, dog) in \"one-of-k\" or \"one-hot\" form. Flux provides the onehot function to make this easy.julia> using Flux: onehot\n\njulia> onehot(:b, [:a, :b, :c])\n3-element Flux.OneHotVector:\n false\n  true\n false\n\njulia> onehot(:c, [:a, :b, :c])\n3-element Flux.OneHotVector:\n false\n false\n  trueThe inverse is argmax (which can take a general probability distribution, as well as just booleans).julia> argmax(ans, [:a, :b, :c])\n:c\n\njulia> argmax([true, false, false], [:a, :b, :c])\n:a\n\njulia> argmax([0.3, 0.2, 0.5], [:a, :b, :c])\n:c"
+},
+
+{
+    "location": "data/onehot.html#Batches-1",
+    "page": "One-Hot Encoding",
+    "title": "Batches",
+    "category": "section",
+    "text": "onehotbatch creates a batch (matrix) of one-hot vectors, and argmax treats matrices as batches.julia> using Flux: onehotbatch\n\njulia> onehotbatch([:b, :a, :b], [:a, :b, :c])\n3×3 Flux.OneHotMatrix:\n false   true  false\n  true  false   true\n false  false  false\n\njulia> onecold(ans, [:a, :b, :c])\n3-element Array{Symbol,1}:\n  :b\n  :a\n  :bNote that these operations returned OneHotVector and OneHotMatrix rather than Arrays. OneHotVectors behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly.. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood."
+},
+
+{
+    "location": "contributing.html#",
+    "page": "Contributing & Help",
+    "title": "Contributing & Help",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "contributing.html#Contributing-and-Help-1",
+    "page": "Contributing & Help",
+    "title": "Contributing & Help",
+    "category": "section",
+    "text": "If you need help, please ask on the Julia forum, the slack (channel #machine-learning), or Flux's Gitter.Right now, the best way to help out is to try out the examples and report any issues or missing features as you find them. The second best way is to help us spread the word, perhaps by starring the repo.If you're interested in hacking on Flux, most of the code is pretty straightforward. Adding new layer definitions or cost functions is simple using the Flux DSL itself, and things like data utilities and training processes are all plain Julia code.If you get stuck or need anything, let us know!"
+},
+
+]}
--- a/release-0.3/siteinfo.js
+++ b/release-0.3/siteinfo.js
@ -0,0 +1 @@
+var DOCUMENTER_CURRENT_VERSION = "release-0.3";
--- a/release-0.3/training/optimisers.html
+++ b/release-0.3/training/optimisers.html
@ -0,0 +1,30 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Optimisers · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li class="current"><a class="toctext" href="optimisers.html">Optimisers</a><ul class="internal"></ul></li><li><a class="toctext" href="training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="optimisers.html">Optimisers</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/training/optimisers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Optimisers</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Optimisers-1" href="#Optimisers-1">Optimisers</a></h1><p>Consider a <a href="../models/basics.html">simple linear regression</a>. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-julia">W = param(rand(2, 5))
+b = param(rand(2))
+
+predict(x) = W*x .+ b
+loss(x, y) = sum((predict(x) .- y).^2)
+
+x, y = rand(5), rand(2) # Dummy data
+l = loss(x, y) # ~ 3
+back!(l)</code></pre><p>We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here&#39;s one way to do that:</p><pre><code class="language-julia">using Flux.Tracker: data, grad
+
+function update()
+  η = 0.1 # Learning Rate
+  for p in (W, b)
+    x, Δ = data(p), grad(p)
+    x .-= η .* Δ # Apply the update
+    Δ .= 0       # Clear the gradient
+  end
+end</code></pre><p>If we call <code>update</code>, the parameters <code>W</code> and <code>b</code> will change and our loss should go down.</p><p>There are two pieces here: one is that we need a list of trainable parameters for the model (<code>[W, b]</code> in this case), and the other is the update step. In this case the update is simply gradient descent (<code>x .-= η .* Δ</code>), but we might choose to do something more advanced, like adding momentum.</p><p>In this case, getting the variables is trivial, but you can imagine it&#39;d be more of a pain with some complex stack of layers.</p><pre><code class="language-julia">m = Chain(
+  Dense(10, 5, σ),
+  Dense(5, 2), softmax)</code></pre><p>Instead of having to write <code>[m[1].W, m[1].b, ...]</code>, Flux provides a params function <code>params(m)</code> that returns a list of all parameters in the model for you.</p><p>For the update step, there&#39;s nothing whatsoever wrong with writing the loop above – it&#39;ll work just fine – but Flux provides various <em>optimisers</em> that make it more convenient.</p><pre><code class="language-julia">opt = SGD([W, b], 0.1) # Gradient descent with learning rate 0.1
+
+opt()</code></pre><p>An optimiser takes a parameter list and returns a function that does the same thing as <code>update</code> above. We can pass either <code>opt</code> or <code>update</code> to our <a href="training/training.html">training loop</a>, which will then run the optimiser after every mini-batch of data.</p><footer><hr/><a class="previous" href="../models/layers.html"><span class="direction">Previous</span><span class="title">Layer Reference</span></a><a class="next" href="training.html"><span class="direction">Next</span><span class="title">Training</span></a></footer></article></body></html>
--- a/release-0.3/training/training.html
+++ b/release-0.3/training/training.html
@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Training · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="optimisers.html">Optimisers</a></li><li class="current"><a class="toctext" href="training.html">Training</a><ul class="internal"><li><a class="toctext" href="#Loss-Functions-1">Loss Functions</a></li><li><a class="toctext" href="#Callbacks-1">Callbacks</a></li></ul></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="training.html">Training</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/training/training.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Training</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Training-1" href="#Training-1">Training</a></h1><p>To actually train a model we need three things:</p><ul><li><p>A <em>loss function</em>, that evaluates how well a model is doing given some input data.</p></li><li><p>A collection of data points that will be provided to the loss function.</p></li><li><p>An <a href="optimisers.html">optimiser</a> that will update the model parameters appropriately.</p></li></ul><p>With these we can call <code>Flux.train!</code>:</p><pre><code class="language-julia">Flux.train!(loss, data, opt)</code></pre><p>There are plenty of examples in the <a href="https://github.com/FluxML/model-zoo">model zoo</a>.</p><h2><a class="nav-anchor" id="Loss-Functions-1" href="#Loss-Functions-1">Loss Functions</a></h2><p>The <code>loss</code> that we defined in <a href="../models/basics.html">basics</a> is completely valid for training. We can also define a loss in terms of some model:</p><pre><code class="language-julia">m = Chain(
+  Dense(784, 32, σ),
+  Dense(32, 10), softmax)
+
+loss(x, y) = Flux.mse(m(x), y)</code></pre><p>The loss will almost always be defined in terms of some <em>cost function</em> that measures the distance of the prediction <code>m(x)</code> from the target <code>y</code>. Flux has several of these built in, like <code>mse</code> for mean squared error or <code>logloss</code> for cross entropy loss, but you can calculate it however you want.</p><h2><a class="nav-anchor" id="Callbacks-1" href="#Callbacks-1">Callbacks</a></h2><p><code>train!</code> takes an additional argument, <code>cb</code>, that&#39;s used for callbacks so that you can observe the training process. For example:</p><pre><code class="language-julia">train!(loss, data, opt, cb = () -&gt; println(&quot;training&quot;))</code></pre><p>Callbacks are called for every batch of training data. You can slow this down using <code>Flux.throttle(f, timeout)</code> which prevents <code>f</code> from being called more than once every <code>timeout</code> seconds.</p><p>A more typical callback might look like this:</p><pre><code class="language-julia">test_x, test_y = # ... create single batch of test data ...
+evalcb() = @show(loss(test_x, test_y))
+
+Flux.train!(loss, data, opt,
+            cb = throttle(evalcb, 5))</code></pre><footer><hr/><a class="previous" href="optimisers.html"><span class="direction">Previous</span><span class="title">Optimisers</span></a><a class="next" href="../data/onehot.html"><span class="direction">Next</span><span class="title">One-Hot Encoding</span></a></footer></article></body></html>
--- a/stable/apis/backends.html
+++ b/stable/apis/backends.html
@ -1,253 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Backends · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="batching.html">
-Batching
-              </a>
-            </li>
-            <li class="current">
-              <a class="toctext" href="backends.html">
-Backends
-              </a>
-              <ul class="internal">
-                <li>
-                  <a class="toctext" href="#Basic-Usage-1">
-Basic Usage
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#Native-Integration-1">
-Native Integration
-                  </a>
-                </li>
-              </ul>
-            </li>
-            <li>
-              <a class="toctext" href="storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-Other APIs
-            </li>
-            <li>
-              <a href="backends.html">
-Backends
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/apis/backends.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Backends-1" href="#Backends-1">
-Backends
-        </a>
-      </h1>
-      <h2>
-        <a class="nav-anchor" id="Basic-Usage-1" href="#Basic-Usage-1">
-Basic Usage
-        </a>
-      </h2>
-<pre><code class="language-julia">model = Chain(Affine(10, 20), σ, Affine(20, 15), softmax)
-xs = rand(10)</code></pre>
-      <p>
-Currently, Flux&#39;s pure-Julia backend has no optimisations. This means that calling
-      </p>
-<pre><code class="language-julia">model(rand(10)) #&gt; [0.0650, 0.0655, ...]</code></pre>
-      <p>
-directly won&#39;t have great performance. In order to run a computationally intensive training process, we need to use a backend like MXNet or TensorFlow.
-      </p>
-      <p>
-This is easy to do. Just call either 
-<code>mxnet</code>
- or 
-<code>tf</code>
- on a model to convert it to a model of that kind:
-      </p>
-<pre><code class="language-julia">mxmodel = mxnet(model)
-mxmodel(xs) #&gt; [0.0650, 0.0655, ...]
-# or
-tfmodel = tf(model)
-tfmodel(xs) #&gt; [0.0650, 0.0655, ...]</code></pre>
-      <p>
-These new models look and feel exactly like every other model in Flux, including returning the same result when you call them, and can be trained as usual using 
-<code>Flux.train!()</code>
-. The difference is that the computation is being carried out by a backend, which will usually give a large speedup.
-      </p>
-      <h2>
-        <a class="nav-anchor" id="Native-Integration-1" href="#Native-Integration-1">
-Native Integration
-        </a>
-      </h2>
-      <p>
-Flux aims to provide high-level APIs that work well across backends, but in some cases you may want to take advantage of features specific to a given backend. In these cases it&#39;s easy to &quot;drop down&quot; and use the backend&#39;s API directly, where appropriate. For example:
-      </p>
-<pre><code class="language-julia">using MXNet
-Flux.loadmx()
-
-mxmodel = mx.FeedForward(model)</code></pre>
-      <p>
-This returns a standard 
-<code>mx.FeedForward</code>
- instance, just like you might have created using MXNet&#39;s usual API. You can then use this with MXNet&#39;s data provider implementation, custom optimisers, or distributed training processes.
-      </p>
-      <p>
-Same goes for TensorFlow, where it&#39;s easy to create a 
-<code>Tensor</code>
- object:
-      </p>
-<pre><code class="language-julia">using TensorFlow
-Flux.loadtf()
-
-x  = placeholder(Float32)
-y = Tensor(model, x)</code></pre>
-      <p>
-This makes makes it easy to take advantage of Flux&#39;s model description and debugging tools while also getting the benefit of the work put into these backends. You can check out how this looks with the integration examples 
-        <a href="https://github.com/MikeInnes/Flux.jl/tree/master/examples">
-here
-        </a>
-.
-      </p>
-      <footer>
-        <hr/>
-        <a class="previous" href="batching.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Batching
-          </span>
-        </a>
-        <a class="next" href="storage.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Storing Models
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/apis/batching.html
+++ b/stable/apis/batching.html
@ -1,392 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Batching · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li class="current">
-              <a class="toctext" href="batching.html">
-Batching
-              </a>
-              <ul class="internal">
-                <li>
-                  <a class="toctext" href="#Basics-1">
-Basics
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#Sequences-and-Nesting-1">
-Sequences and Nesting
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#Future-Work-1">
-Future Work
-                  </a>
-                </li>
-              </ul>
-            </li>
-            <li>
-              <a class="toctext" href="backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-Other APIs
-            </li>
-            <li>
-              <a href="batching.html">
-Batching
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/apis/batching.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Batching-1" href="#Batching-1">
-Batching
-        </a>
-      </h1>
-      <h2>
-        <a class="nav-anchor" id="Basics-1" href="#Basics-1">
-Basics
-        </a>
-      </h2>
-      <p>
-Existing machine learning frameworks and libraries represent batching, and other properties of data, only implicitly. Your machine learning data is a large 
-<code>N</code>
-dimensional array, which may have a shape like:
-      </p>
-<pre><code class="language-julia">100 × 50 × 256 × 256</code></pre>
-      <p>
-Typically, this might represent that you have (say) a batch of 100 samples, where each sample is a 50-long sequence of 256×256 images. This is great for performance, but array operations often become much more cumbersome as a result. Especially if you manipulate dimensions at runtime as an optimisation, debugging models can become extremely fiddly, with a proliferation of 
-<code>X × Y × Z</code>
- arrays and no information about where they came from.
-      </p>
-      <p>
-Flux introduces a new approach where the batch dimension is represented explicitly as part of the data. For example:
-      </p>
-<pre><code class="language-julia">julia&gt; xs = Batch([[1,2,3], [4,5,6]])
-2-element Batch of Vector{Int64}:
- [1,2,3]
- [4,5,6]</code></pre>
-      <p>
-Batches are represented the way we 
-        <em>
-think
-        </em>
- about them; as a list of data points. We can do all the usual array operations with them, including getting the first with 
-<code>xs[1]</code>
-, iterating over them and so on. The trick is that under the hood, the data is batched into a single array:
-      </p>
-<pre><code class="language-julia">julia&gt; rawbatch(xs)
-2×3 Array{Int64,2}:
- 1  2  3
- 4  5  6</code></pre>
-      <p>
-When we put a 
-<code>Batch</code>
- object into a model, the model is ultimately working with a single array, which means there&#39;s no performance overhead and we get the full benefit of standard batching.
-      </p>
-      <p>
-Turning a set of vectors into a matrix is fairly easy anyway, so what&#39;s the big deal? Well, it gets more interesting as we start working with more complex data. Say we were working with 4×4 images:
-      </p>
-<pre><code class="language-julia">julia&gt; xs = Batch([[1 2; 3 4], [5 6; 7 8]])
-2-element Flux.Batch of Array{Int64,2}:
- [1 2; 3 4]
- [5 6; 7 8]</code></pre>
-      <p>
-The raw batch array is much messier, and harder to recognise:
-      </p>
-<pre><code class="language-julia">julia&gt; rawbatch(xs)
-2×2×2 Array{Int64,3}:
-[:, :, 1] =
- 1  3
- 5  7
-
-[:, :, 2] =
- 2  4
- 6  8</code></pre>
-      <p>
-Furthermore, because the batches acts like a list of arrays, we can use simple and familiar operations on it:
-      </p>
-<pre><code class="language-julia">julia&gt; map(flatten, xs)
-2-element Array{Array{Int64,1},1}:
- [1,3,2,4]
- [5,7,6,8]</code></pre>
-      <p>
-<code>flatten</code>
- is simple enough over a single data point, but flattening a batched data set is more complex and you end up needing arcane array operations like 
-<code>mapslices</code>
-. A 
-<code>Batch</code>
- can just handle this for you for free, and more importantly it ensures that your operations are 
-        <em>
-correct
-        </em>
- – that you haven&#39;t mixed up your batch and data dimensions, or used the wrong array op, and so on.
-      </p>
-      <h2>
-        <a class="nav-anchor" id="Sequences-and-Nesting-1" href="#Sequences-and-Nesting-1">
-Sequences and Nesting
-        </a>
-      </h2>
-      <p>
-As well as 
-<code>Batch</code>
-, there&#39;s a structure called 
-<code>Seq</code>
- which behaves very similarly. Let&#39;s say we have two one-hot encoded DNA sequences:
-      </p>
-<pre><code class="language-julia">julia&gt; x1 = Seq([[0,1,0,0], [1,0,0,0], [0,0,0,1]]) # [A, T, C, G]
-julia&gt; x2 = Seq([[0,0,1,0], [0,0,0,1], [0,0,1,0]])
-
-julia&gt; rawbatch(x1)
-3×4 Array{Int64,2}:
- 0  1  0  0
- 1  0  0  0
- 0  0  0  1</code></pre>
-      <p>
-This is identical to 
-<code>Batch</code>
- so far; but where it gets interesting is that you can actually nest these types:
-      </p>
-<pre><code class="language-julia">julia&gt; xs = Batch([x1, x2])
-2-element Batch of Seq of Vector{Int64}:
- [[0,1,0,0],[1,0,0,0],[0,0,0,1]]
- [[0,0,1,0],[0,0,0,1],[0,0,1,0]]</code></pre>
-      <p>
-Again, this represents itself intuitively as a list-of-lists-of-lists, but 
-<code>rawbatch</code>
- shows that the real underlying value is an 
-<code>Array{Int64,3}</code>
- of shape 
-<code>2×3×4</code>
-.
-      </p>
-      <h2>
-        <a class="nav-anchor" id="Future-Work-1" href="#Future-Work-1">
-Future Work
-        </a>
-      </h2>
-      <p>
-The design of batching is still a fairly early work in progress, though it&#39;s used in a few places in the system. For example, all Flux models expect to be given 
-<code>Batch</code>
- objects which are unwrapped into raw arrays for the computation. Models will convert their arguments if necessary, so it&#39;s convenient to call a model with a single data point like 
-<code>f([1,2,3])</code>
-.
-      </p>
-      <p>
-Right now, the 
-<code>Batch</code>
- or 
-<code>Seq</code>
- types always stack along the left-most dimension. In future, this will be customisable, and Flux will provide implementations of common functions that are generic across the batch dimension. This brings the following benefits:
-      </p>
-      <ul>
-        <li>
-          <p>
-Code can be written in a batch-agnostic way or be generic across batching strategies.
-          </p>
-        </li>
-        <li>
-          <p>
-Batching and optimisations, like switching batch dimensions, can be expressed by the programmer with compiler support; fewer code changes are required and optimisations are guaranteed not to break the model.
-          </p>
-        </li>
-        <li>
-          <p>
-This also opens the door for more automatic optimisations, e.g. having the compiler explore the search base of possible batching combinations.
-          </p>
-        </li>
-      </ul>
-      <p>
-Here&#39;s a more detailed illustration of how it might look for code to be &quot;generic across batching&quot;. Take for example a weight matrix 
-<code>W</code>
- times a vector 
-<code>x</code>
-, as used in a logistic regression or a simple neural network:
-      </p>
-<pre><code class="language-julia">   W    *   x  =&gt;   y
-(10×28) * (28) =&gt; (10)</code></pre>
-      <p>
-If we want to work with a batch of 50 
-<code>x</code>
-s, one option is to stack the data into a matrix of size 
-<code>28 × 50</code>
-.
-      </p>
-<pre><code class="language-julia">   W    *    x    =&gt;    y
-(10×28) * (28×50) =&gt; (10×50)</code></pre>
-      <p>
-This works, but we may find that it&#39;s slow or doesn&#39;t fit well with the rest of the model, which batches on the first dimension. For that reason we may instead want to put the data in a 
-<code>50 × 28</code>
- matrix and alter the code as follows:
-      </p>
-<pre><code class="language-julia">   x    *    W&#39;   =&gt;    y
-(50×28) * (28×10) =&gt; (50×10)</code></pre>
-      <p>
-to make the shapes work out. This code change is not ideal; in more complex cases it can become fiddly and error-prone, and it means that the code is less reusable, tied to a particular implementation strategy.
-      </p>
-      <p>
-There&#39;s an alternative. We keep the same code, but represent the batched 
-<code>x</code>
-s as either a 
-<code>Batch{Vector,1}</code>
- or a 
-<code>Batch{Vector,2}</code>
-, depending on how the data is stacked. Then we can simply overload 
-<code>*</code>
- as follows:
-      </p>
-<pre><code class="language-julia">*(W::Matrix, x::Batch{Vector,1}) = x * W&#39;
-*(W::Matrix, x::Batch{Vector,2}) = W * x</code></pre>
-      <p>
-This means that we can always write 
-<code>W*x</code>
-, and the code is reusable in a larger network regardless of the overall batching approach. Moreover, Julia&#39;s type system ensures there&#39;s no runtime cost to doing this, and we can compile the code appropriately for backends like TensorFlow as well.
-      </p>
-      <footer>
-        <hr/>
-        <a class="previous" href="../models/debugging.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Debugging
-          </span>
-        </a>
-        <a class="next" href="backends.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Backends
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/apis/storage.html
+++ b/stable/apis/storage.html
@ -1,207 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Storing Models · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="backends.html">
-Backends
-              </a>
-            </li>
-            <li class="current">
-              <a class="toctext" href="storage.html">
-Storing Models
-              </a>
-              <ul class="internal"></ul>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-Other APIs
-            </li>
-            <li>
-              <a href="storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/apis/storage.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Loading-and-Saving-Models-1" href="#Loading-and-Saving-Models-1">
-Loading and Saving Models
-        </a>
-      </h1>
-<pre><code class="language-julia">model = Chain(Affine(10, 20), σ, Affine(20, 15), softmax)</code></pre>
-      <p>
-Since models are just simple Julia data structures, it&#39;s very easy to save and load them using any of Julia&#39;s existing serialisation formats. For example, using Julia&#39;s built-in 
-<code>serialize</code>
-:
-      </p>
-<pre><code class="language-julia">open(io -&gt; serialize(io, model), &quot;model.jls&quot;, &quot;w&quot;)
-open(io -&gt; deserialize(io), &quot;model.jls&quot;)</code></pre>
-      <p>
-One issue with 
-<code>serialize</code>
- is that it doesn&#39;t promise compatibility between major Julia versions. For longer-term storage it&#39;s good to use a package like 
-        <a href="https://github.com/JuliaIO/JLD.jl">
-JLD
-        </a>
-.
-      </p>
-<pre><code class="language-julia">using JLD
-@save &quot;model.jld&quot; model
-@load &quot;model.jld&quot;</code></pre>
-      <p>
-However, JLD will break for some models as functions are not supported on 0.5+. You can resolve that by checking out 
-        <a href="https://github.com/JuliaIO/JLD.jl/pull/137">
-this branch
-        </a>
-.
-      </p>
-      <p>
-Right now this is the only storage format Flux supports. In future Flux will support loading and saving other model formats (on an as-needed basis).
-      </p>
-      <footer>
-        <hr/>
-        <a class="previous" href="backends.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Backends
-          </span>
-        </a>
-        <a class="next" href="../examples/logreg.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Simple MNIST
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/assets/arrow.svg
+++ b/stable/assets/arrow.svg
@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="16.5mm"
+   height="8.6603003mm"
+   viewBox="0 0 58.464567 30.686103"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="arrow.svg">
+  <defs
+     id="defs4" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="11.2"
+     inkscape:cx="14.209234"
+     inkscape:cy="29.780479"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:window-width="1920"
+     inkscape:window-height="1053"
+     inkscape:window-x="0"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1" />
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-1021.6761)">
+    <path
+       style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 0,1021.6761 35.433071,0 -17.716536,30.6861 z"
+       id="path4140"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccc" />
+  </g>
+</svg>
--- a/stable/assets/documenter.css
+++ b/stable/assets/documenter.css
@ -21,7 +21,12 @@ body, input {
 }

 pre, code {
-  font-family: 'Ubuntu Mono', Monaco, courier, monospace;
+  font-family: 'Roboto Mono', Monaco, courier, monospace;
+  font-size: 0.90em;
+}
+
+pre code {
+  font-size: 1em;
 }

 a {
@ -144,6 +149,42 @@ nav.toc .logo {

 nav.toc h1 {
    text-align: center;
+    margin-top: .57em;
+    margin-bottom: 0;
+}
+
+nav.toc select {
+    display: block;
+    height: 2em;
+    padding: 0 1.6em 0 1em;
+    min-width: 7em;
+    max-width: 90%;
+    max-width: calc(100% - 5em);
+    margin: 0 auto;
+    font-size: .83em;
+    border: 1px solid #c9c9c9;
+    border-radius: 1em;
+
+    /* TODO: doesn't seem to be centered on Safari */
+    text-align: center;
+    text-align-last: center;
+
+    appearance: none;
+    -moz-appearance: none;
+    -webkit-appearance: none;
+
+    background: white url("arrow.svg");
+    background-size: 1.155em;
+    background-repeat: no-repeat;
+    background-position: right;
+}
+
+nav.toc select:hover {
+    border: 1px solid #a0a0a0;
+}
+
+nav.toc select option {
+    text-align: center;
 }

 nav.toc input {
@ -151,20 +192,11 @@ nav.toc input {
    height: 2em;
    width: 90%;
    width: calc(100% - 5em);
-    margin: 0 auto;
+    margin: 1.2em auto;
    padding: 0 1em;
    border: 1px solid #c9c9c9;
    border-radius: 1em;
-    font-size: smaller;
-}
-
-nav.toc select {
-    display: block;
-    height: 2em;
-    width: calc(100% - 3em);
-    margin: 5px auto;
-    font-size: smaller;
-    text-align: center;
+    font-size: .83em;
 }

 nav.toc > ul * {
@ -172,7 +204,7 @@ nav.toc > ul * {
 }

 nav.toc ul {
-    color: #b3b3b3;
+    color: #404040;
    padding: 0;
    list-style: none;
 }
@ -183,6 +215,7 @@ nav.toc ul .toctext {
 }

 nav.toc ul a:hover {
+    color: #fcfcfc;
    background-color: #4e4a4a;
 }

@ -196,7 +229,6 @@ nav.toc ul.internal a:hover {
 }

 nav.toc ul.internal {
-    color: gray;
    background-color: #e3e3e3;
    box-shadow: inset -14px 0px 5px -12px rgb(210,210,210);
    list-style: none;
@ -246,6 +278,10 @@ article {

 article > header {}

+article > header div#topbar {
+    display: none;
+}
+
 article > header nav ul {
    display: inline-block;
    list-style: none;
@ -401,3 +437,105 @@ article section.docstring a.source-link {
    background-color: transparent;
    padding: 0;
 }
+
+@media only screen and (max-width: 768px) {
+    nav.toc {
+        position: fixed;
+        overflow-y: scroll;
+        width: 16em;
+        left: -16em;
+        -webkit-overflow-scrolling: touch;
+        -webkit-transition-property: left; /* Safari */
+        -webkit-transition-duration: 0.3s; /* Safari */
+        transition-property: left;
+        transition-duration: 0.3s;
+        -webkit-transition-timing-function: ease-out; /* Safari */
+        transition-timing-function: ease-out;
+        z-index: 2;
+    }
+
+    nav.toc.show {
+        left: 0;
+    }
+
+    article {
+        margin-left: 0;
+        padding: 3em 0.9em 0 0.9em; /* top right bottom left */
+        overflow-wrap: break-word;
+    }
+
+    article > header {
+        position: fixed;
+        left: 0;
+        z-index: 1;
+    }
+
+    article > header nav, hr {
+        display: none;
+    }
+
+    article > header div#topbar {
+        display: block; /* is mobile */
+        position: fixed;
+        width: 100%;
+        height: 1.5em;
+        padding-top: 1em;
+        padding-bottom: 1em;
+        background-color: #fcfcfc;
+        box-shadow: 0 1px 3px rgba(0,0,0,.26);
+        top: 0;
+        -webkit-transition-property: top; /* Safari */
+        -webkit-transition-duration: 0.3s; /* Safari */
+        transition-property: top;
+        transition-duration: 0.3s;
+    }
+
+    article > header div#topbar.headroom--unpinned.headroom--not-top.headroom--not-bottom {
+        top: -4em;
+        -webkit-transition-property: top; /* Safari */
+        -webkit-transition-duration: 0.7s; /* Safari */
+        transition-property: top;
+        transition-duration: 0.7s;
+    }
+
+    article > header div#topbar span {
+        position: fixed;
+        width: 80%;
+        height: 1.5em;
+        margin-top: -0.1em;
+        margin-left: 0.9em;
+        font-size: 1.2em;
+        overflow: hidden;
+    }
+
+    article > header div#topbar a.fa-bars {
+        float: right;
+        padding: 0.6em;
+        margin-top: -0.6em;
+        margin-right: 0.3em;
+        font-size: 1.5em;
+    }
+
+    article > header div#topbar a.fa-bars:visited {
+        color: #3091d1;
+    }
+
+    article table {
+        overflow-x: auto;
+        display: block;
+    }
+
+    article div.MathJax_Display {
+        overflow: scroll;
+    }
+
+    article span.MathJax {
+        overflow: hidden;
+    }
+}
+
+@media only screen and (max-width: 320px) {
+    body {
+        font-size: 15px;
+    }
+}
--- a/stable/assets/documenter.js
+++ b/stable/assets/documenter.js
@ -7,17 +7,20 @@

 requirejs.config({
    paths: {
-        'jquery': 'https://code.jquery.com/jquery-3.1.0.js?',
+        'jquery': 'https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min',
        'jqueryui': 'https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.12.0/jquery-ui.min',
-        'mathjax': 'https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML',
-        'highlight': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/highlight.min',
-        'highlight-julia': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/languages/julia.min',
+        'headroom': 'https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.3/headroom.min',
+        'mathjax': 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS_HTML',
+        'highlight': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min',
+        'highlight-julia': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/julia.min',
+        'highlight-julia-repl': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/julia-repl.min',
    },
    shim: {
        'mathjax' : {
            exports: "MathJax"
        },
-        'highlight-julia': ['highlight']
+        'highlight-julia': ['highlight'],
+        'highlight-julia-repl': ['highlight'],
    }
 });

@ -50,16 +53,77 @@ require(['mathjax'], function(MathJax) {
    });
 })

-require(['jquery', 'highlight', 'highlight-julia'], function($, hljs) {
+require(['jquery', 'highlight', 'highlight-julia', 'highlight-julia-repl'], function($, hljs) {
    $(document).ready(function() {
-        if (typeof DOC_VERSIONS !== 'undefined') {
-            var version_selector = $("#version-selector");
-            DOC_VERSIONS.forEach(function(each) {
-                var option = $("<option value='" + documenterBaseURL + "/../" + each + "'>" + each + "</option>");
-                version_selector.append(option);
-            });
-        }
        hljs.initHighlighting();
    })

 })
+
+// update the version selector with info from the siteinfo.js and ../versions.js files
+require(['jquery'], function($) {
+    $(document).ready(function() {
+        var version_selector = $("#version-selector");
+
+        // add the current version to the selector based on siteinfo.js, but only if the selector is empty
+        if (typeof DOCUMENTER_CURRENT_VERSION !== 'undefined' && $('#version-selector > option').length == 0) {
+            var option = $("<option value='#' selected='selected'>" + DOCUMENTER_CURRENT_VERSION + "</option>");
+            version_selector.append(option);
+        }
+
+        if (typeof DOC_VERSIONS !== 'undefined') {
+            var existing_versions = $('#version-selector > option');
+            var existing_versions_texts = existing_versions.map(function(i,x){return x.text});
+            DOC_VERSIONS.forEach(function(each) {
+                var version_url = documenterBaseURL + "/../" + each;
+                var existing_id = $.inArray(each, existing_versions_texts);
+                // if not already in the version selector, add it as a new option,
+                // otherwise update the old option with the URL and enable it
+                if (existing_id == -1) {
+                    var option = $("<option value='" + version_url + "'>" + each + "</option>");
+                    version_selector.append(option);
+                } else {
+                    var option = existing_versions[existing_id];
+                    option.value = version_url;
+                    option.disabled = false;
+                }
+            });
+        }
+
+        // only show the version selector if the selector has been populated
+        if ($('#version-selector > option').length > 0) {
+            version_selector.css("visibility", "visible");
+        }
+    })
+
+})
+
+// mobile
+require(['jquery', 'headroom'], function($, Headroom) {
+    $(document).ready(function() {
+        var navtoc = $("nav.toc");
+        $("nav.toc li.current a.toctext").click(function() {
+            navtoc.toggleClass('show');
+        });
+        $("article > header div#topbar a.fa-bars").click(function(ev) {
+            ev.preventDefault();
+            navtoc.toggleClass('show');
+            if (navtoc.hasClass('show')) {
+                var title = $("article > header div#topbar span").text();
+                $("nav.toc ul li a:contains('" + title + "')").focus();
+            }
+        });
+        $("article#docs").bind('click', function(ev) {
+            if ($(ev.target).is('div#topbar a.fa-bars')) {
+                return;
+            }
+            if (navtoc.hasClass('show')) {
+                navtoc.removeClass('show');
+            }
+        });
+        if ($("article > header div#topbar").css('display') == 'block') {
+            var headroom = new Headroom(document.querySelector("article > header div#topbar"), {"tolerance": {"up": 10, "down": 10}});
+            headroom.init();
+        }
+    })
+})
--- a/stable/contributing.html
+++ b/stable/contributing.html
@ -1,212 +1,9 @@
 <!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Contributing &amp; Help · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Contributing &amp; Help · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL="."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script>
-    <script src="../versions.js"></script>
-    <link href="../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li class="current">
-          <a class="toctext" href="contributing.html">
-Contributing &amp; Help
-          </a>
-          <ul class="internal"></ul>
-        </li>
-        <li>
-          <a class="toctext" href="internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-              <a href="contributing.html">
-Contributing &amp; Help
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/contributing.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Contributing-1" href="#Contributing-1">
-Contributing
-        </a>
-      </h1>
-      <p>
-If you need help, please ask on the 
-        <a href="https://discourse.julialang.org/">
-Julia forum
-        </a>
- or on Flux&#39;s 
-        <a href="https://gitter.im/MikeInnes/Flux.jl">
-Gitter
-        </a>
-.
-      </p>
-      <p>
-Right now, the best way to help out is to try out the examples and report any issues or missing features as you find them. The second best way is to help us spread the word, perhaps by 
-        <a href="https://github.com/MikeInnes/Flux.jl">
-starring the repo
-        </a>
-.
-      </p>
-      <p>
-If you&#39;re interested in hacking on Flux, most of the 
-        <a href="https://github.com/MikeInnes/Flux.jl/tree/master/src">
-code
-        </a>
- is pretty straightforward. Adding new 
-        <a href="https://github.com/MikeInnes/Flux.jl/tree/master/src/layers">
-layer definitions
-        </a>
- or cost functions is simple using the Flux DSL itself, and things like data utilities and training processes are all plain Julia code. The 
-<code>compiler</code>
- directory is a bit more involved and is documented in 
-        <a href="interals.html">
-internals
-        </a>
-, but most changes won&#39;t need to touch that.
-      </p>
-      <p>
-If you get stuck or need anything, let us know!
-      </p>
-      <footer>
-        <hr/>
-        <a class="previous" href="examples/char-rnn.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Char RNN
-          </span>
-        </a>
-        <a class="next" href="internals.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Internals
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li class="current"><a class="toctext" href="contributing.html">Contributing &amp; Help</a><ul class="internal"></ul></li></ul></nav><article id="docs"><header><nav><ul><li><a href="contributing.html">Contributing &amp; Help</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/contributing.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Contributing &amp; Help</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Contributing-and-Help-1" href="#Contributing-and-Help-1">Contributing &amp; Help</a></h1><p>If you need help, please ask on the <a href="https://discourse.julialang.org/">Julia forum</a>, the <a href="https://discourse.julialang.org/t/announcing-a-julia-slack/4866">slack</a> (channel #machine-learning), or Flux&#39;s <a href="https://gitter.im/FluxML/Lobby">Gitter</a>.</p><p>Right now, the best way to help out is to try out the examples and report any issues or missing features as you find them. The second best way is to help us spread the word, perhaps by <a href="https://github.com/MikeInnes/Flux.jl">starring the repo</a>.</p><p>If you&#39;re interested in hacking on Flux, most of the <a href="https://github.com/MikeInnes/Flux.jl/tree/master/src">code</a> is pretty straightforward. Adding new <a href="https://github.com/MikeInnes/Flux.jl/tree/master/src/layers">layer definitions</a> or cost functions is simple using the Flux DSL itself, and things like data utilities and training processes are all plain Julia code.</p><p>If you get stuck or need anything, let us know!</p><footer><hr/><a class="previous" href="data/onehot.html"><span class="direction">Previous</span><span class="title">One-Hot Encoding</span></a></footer></article></body></html>
--- a/stable/data/onehot.html
+++ b/stable/data/onehot.html
@ -0,0 +1,40 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>One-Hot Encoding · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li class="current"><a class="toctext" href="onehot.html">One-Hot Encoding</a><ul class="internal"><li><a class="toctext" href="#Batches-1">Batches</a></li></ul></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Data Munging</li><li><a href="onehot.html">One-Hot Encoding</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/data/onehot.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>One-Hot Encoding</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="One-Hot-Encoding-1" href="#One-Hot-Encoding-1">One-Hot Encoding</a></h1><p>It&#39;s common to encode categorical variables (like <code>true</code>, <code>false</code> or <code>cat</code>, <code>dog</code>) in &quot;one-of-k&quot; or <a href="https://en.wikipedia.org/wiki/One-hot">&quot;one-hot&quot;</a> form. Flux provides the <code>onehot</code> function to make this easy.</p><pre><code class="language-none">julia&gt; using Flux: onehot
+
+julia&gt; onehot(:b, [:a, :b, :c])
+3-element Flux.OneHotVector:
+ false
+  true
+ false
+
+julia&gt; onehot(:c, [:a, :b, :c])
+3-element Flux.OneHotVector:
+ false
+ false
+  true</code></pre><p>The inverse is <code>argmax</code> (which can take a general probability distribution, as well as just booleans).</p><pre><code class="language-julia">julia&gt; argmax(ans, [:a, :b, :c])
+:c
+
+julia&gt; argmax([true, false, false], [:a, :b, :c])
+:a
+
+julia&gt; argmax([0.3, 0.2, 0.5], [:a, :b, :c])
+:c</code></pre><h2><a class="nav-anchor" id="Batches-1" href="#Batches-1">Batches</a></h2><p><code>onehotbatch</code> creates a batch (matrix) of one-hot vectors, and <code>argmax</code> treats matrices as batches.</p><pre><code class="language-julia">julia&gt; using Flux: onehotbatch
+
+julia&gt; onehotbatch([:b, :a, :b], [:a, :b, :c])
+3×3 Flux.OneHotMatrix:
+ false   true  false
+  true  false   true
+ false  false  false
+
+julia&gt; onecold(ans, [:a, :b, :c])
+3-element Array{Symbol,1}:
+  :b
+  :a
+  :b</code></pre><p>Note that these operations returned <code>OneHotVector</code> and <code>OneHotMatrix</code> rather than <code>Array</code>s. <code>OneHotVector</code>s behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly.. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood.</p><footer><hr/><a class="previous" href="../training/training.html"><span class="direction">Previous</span><span class="title">Training</span></a><a class="next" href="../contributing.html"><span class="direction">Next</span><span class="title">Contributing &amp; Help</span></a></footer></article></body></html>
--- a/stable/examples/char-rnn.html
+++ b/stable/examples/char-rnn.html
@ -1,278 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Char RNN · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li class="current">
-              <a class="toctext" href="char-rnn.html">
-Char RNN
-              </a>
-              <ul class="internal"></ul>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-In Action
-            </li>
-            <li>
-              <a href="char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/examples/char-rnn.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Char-RNN-1" href="#Char-RNN-1">
-Char RNN
-        </a>
-      </h1>
-      <p>
-This walkthrough will take you through a model like that used in 
-        <a href="http://karpathy.github.io/2015/05/21/rnn-effectiveness/">
-Karpathy&#39;s 2015 blog post
-        </a>
-, which can learn to generate text in the style of Shakespeare (or whatever else you may use as input). 
-<code>shakespeare_input.txt</code>
- is 
-        <a href="http://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt">
-here
-        </a>
-.
-      </p>
-<pre><code class="language-julia">using Flux
-import StatsBase: wsample</code></pre>
-      <p>
-Firstly, we define up front how many steps we want to unroll the RNN, and the number of data points to batch together. Then we create some functions to prepare our data, using Flux&#39;s built-in utilities.
-      </p>
-<pre><code class="language-julia">nunroll = 50
-nbatch = 50
-
-getseqs(chars, alphabet) =
-  sequences((onehot(Float32, char, alphabet) for char in chars), nunroll)
-getbatches(chars, alphabet) =
-  batches((getseqs(part, alphabet) for part in chunk(chars, nbatch))...)</code></pre>
-      <p>
-Because we want the RNN to predict the next letter at each iteration, our target data is simply our input data offset by one. For example, if the input is &quot;The quick brown fox&quot;, the target will be &quot;he quick brown fox &quot;. Each letter is one-hot encoded and sequences are batched together to create the training data.
-      </p>
-<pre><code class="language-julia">input = readstring(&quot;shakespeare_input.txt&quot;);
-alphabet = unique(input)
-N = length(alphabet)
-
-# An iterator of (input, output) pairs
-train = zip(getbatches(input, alphabet), getbatches(input[2:end], alphabet))
-# We will evaluate the loss on a particular batch to monitor the training.
-eval = tobatch.(first(drop(train, 5)))</code></pre>
-      <p>
-Creating the model and training it is straightforward:
-      </p>
-<pre><code class="language-julia">model = Chain(
-  Input(N),
-  LSTM(N, 256),
-  LSTM(256, 256),
-  Affine(256, N),
-  softmax)
-
-m = tf(unroll(model, nunroll))
-
-# Call this to see how the model is doing
-evalcb = () -&gt; @show logloss(m(eval[1]), eval[2])
-
-@time Flux.train!(m, train, η = 0.1, loss = logloss, cb = [evalcb])
-</code></pre>
-      <p>
-Finally, we can sample the model. For sampling we remove the 
-<code>softmax</code>
- from the end of the chain so that we can &quot;sharpen&quot; the resulting probabilities.
-      </p>
-<pre><code class="language-julia">function sample(model, n, temp = 1)
-  s = [rand(alphabet)]
-  m = unroll1(model)
-  for i = 1:n-1
-    push!(s, wsample(alphabet, softmax(m(unsqueeze(onehot(s[end], alphabet)))./temp)[1,:]))
-  end
-  return string(s...)
-end
-
-sample(model[1:end-1], 100)</code></pre>
-      <p>
-<code>sample</code>
- then produces a string of Shakespeare-like text. This won&#39;t produce great results after only a single epoch (though they will be recognisably different from the untrained model). Going for 30 epochs or so produces good results.
-      </p>
-      <p>
-Trained on 
-        <a href="https://gist.githubusercontent.com/MikeInnes/c2d11b57a58d7f2466b8013b88df1f1c/raw/4423f7cb07c71c80bd6458bb94f7bf5338403284/julia.jl">
-a dataset from base Julia
-        </a>
-, the network can produce code like:
-      </p>
-<pre><code class="language-julia">function show(io::IO, md::Githompty)
-    Buffer(jowerTriangular(inals[i], initabs_indices), characters, side, nextfloat(typeof(x)))
-    isnull(r) &amp;&amp; return
-    start::I!
-    for j = 1:length(b,1)
-        a = s-&gt;cosvect(code)
-        return
-    end
-    indsERenv | maximum(func,lsg))
-    for i = 1:last(Abjelar) &amp;&amp; fname (=== nothing)
-        throw(ArgumentError(&quot;read is declave non-fast-a/remaining of not descride method names&quot;))
-    end
-    if e.ht === Int
-        # update file to a stroducative, but is decould.
-        # xna i -GB =# [unsafe_color &lt;c *has may num 20&lt;11E 16/s
-        tuple | Expr(:(UnitLowerTriangular(transpose,(repl.ptr)))
-        dims = pipe_read(s,Int(a)...)
-    ex,0 + y.uilid_func &amp; find_finwprevend(msg,:2)
-    ex = stage(c)
-    # uvvalue begin
-    end
-end</code></pre>
-      <footer>
-        <hr/>
-        <a class="previous" href="logreg.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Simple MNIST
-          </span>
-        </a>
-        <a class="next" href="../contributing.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Contributing &amp; Help
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/examples/logreg.html
+++ b/stable/examples/logreg.html
@ -1,262 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Simple MNIST · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li class="current">
-              <a class="toctext" href="logreg.html">
-Simple MNIST
-              </a>
-              <ul class="internal"></ul>
-            </li>
-            <li>
-              <a class="toctext" href="char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-In Action
-            </li>
-            <li>
-              <a href="logreg.html">
-Simple MNIST
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/examples/logreg.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Recognising-MNIST-Digits-1" href="#Recognising-MNIST-Digits-1">
-Recognising MNIST Digits
-        </a>
-      </h1>
-      <p>
-This walkthrough example will take you through writing a multi-layer perceptron that classifies MNIST digits with high accuracy.
-      </p>
-      <p>
-First, we load the data using the MNIST package:
-      </p>
-<pre><code class="language-julia">using Flux, MNIST
-using Flux: accuracy
-
-data = [(trainfeatures(i), onehot(trainlabel(i), 0:9)) for i = 1:60_000]
-train = data[1:50_000]
-test = data[50_001:60_000]</code></pre>
-      <p>
-The only Flux-specific function here is 
-<code>onehot</code>
-, which takes a class label and turns it into a one-hot-encoded vector that we can use for training. For example:
-      </p>
-<pre><code class="language-julia">julia&gt; onehot(:b, [:a, :b, :c])
-3-element Array{Int64,1}:
- 0
- 1
- 0</code></pre>
-      <p>
-Otherwise, the format of the data is simple enough, it&#39;s just a list of tuples from input to output. For example:
-      </p>
-<pre><code class="language-julia">julia&gt; data[1]
-([0.0,0.0,0.0, … 0.0,0.0,0.0],[0,0,0,0,0,1,0,0,0,0])</code></pre>
-      <p>
-<code>data[1][1]</code>
- is a 
-<code>28*28 == 784</code>
- length vector (mostly zeros due to the black background) and 
-<code>data[1][2]</code>
- is its classification.
-      </p>
-      <p>
-Now we define our model, which will simply be a function from one to the other.
-      </p>
-<pre><code class="language-julia">m = @Chain(
-  Input(784),
-  Affine(128), relu,
-  Affine( 64), relu,
-  Affine( 10), softmax)
-
-model = mxnet(m) # Convert to MXNet</code></pre>
-      <p>
-We can try this out on our data already:
-      </p>
-<pre><code class="language-julia">julia&gt; model(tobatch(data[1][1]))
-10-element Array{Float64,1}:
- 0.10614  
- 0.0850447
- 0.101474
- ...</code></pre>
-      <p>
-The model gives a probability of about 0.1 to each class – which is a way of saying, &quot;I have no idea&quot;. This isn&#39;t too surprising as we haven&#39;t shown it any data yet. This is easy to fix:
-      </p>
-<pre><code class="language-julia">Flux.train!(model, train, η = 1e-3,
-            cb = [()-&gt;@show accuracy(m, test)])</code></pre>
-      <p>
-The training step takes about 5 minutes (to make it faster we can do smarter things like batching). If you run this code in Juno, you&#39;ll see a progress meter, which you can hover over to see the remaining computation time.
-      </p>
-      <p>
-Towards the end of the training process, Flux will have reported that the accuracy of the model is now about 90%. We can try it on our data again:
-      </p>
-<pre><code class="language-julia">10-element Array{Float32,1}:
- ...
- 5.11423f-7
- 0.9354     
- 3.1033f-5  
- 0.000127077
- ...</code></pre>
-      <p>
-Notice the class at 93%, suggesting our model is very confident about this image. We can use 
-<code>onecold</code>
- to compare the true and predicted classes:
-      </p>
-<pre><code class="language-julia">julia&gt; onecold(data[1][2], 0:9)
-5
-
-julia&gt; onecold(model(tobatch(data[1][1])), 0:9)
-5</code></pre>
-      <p>
-Success!
-      </p>
-      <footer>
-        <hr/>
-        <a class="previous" href="../apis/storage.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Storing Models
-          </span>
-        </a>
-        <a class="next" href="char-rnn.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Char RNN
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/index.html
+++ b/stable/index.html
@ -1,271 +1,10 @@
 <!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Home · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Home · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL="."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script>
-    <script src="../versions.js"></script>
-    <link href="../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li class="current">
-          <a class="toctext" href="index.html">
-Home
-          </a>
-          <ul class="internal">
-            <li>
-              <a class="toctext" href="#Where-do-I-start?-1">
-Where do I start?
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="#Installation-1">
-Installation
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-              <a href="index.html">
-Home
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/index.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Flux-1" href="#Flux-1">
-Flux
-        </a>
-      </h1>
-      <p>
-        <em>
-... Initialising Photon Beams ...
-        </em>
-      </p>
-      <p>
-Flux is a library for machine learning, implemented in Julia. In a nutshell, it simply lets you run normal Julia code on a backend like TensorFlow. It also provides many conveniences for doing deep learning.
-      </p>
-      <p>
-Flux is very flexible. You can use a convenient Keras-like API if you want something simple, but you can also drop down to straight mathematics, or build your own abstractions. You can even use Flux&#39;s utilities (like optimisers) with a completely different backend (like 
-        <a href="https://github.com/denizyuret/Knet.jl">
-Knet
-        </a>
-) or mix and match approaches.
-      </p>
-      <p>
-Note that Flux is in alpha. Many things work but the API is still in a state of... well, it might change.
-      </p>
-      <p>
-        <strong>
-Note:
-        </strong>
- If you&#39;re using Julia v0.5 please see 
-        <a href="http://mikeinnes.github.io/Flux.jl/v0.1.1/">
-this version
-        </a>
- of the docs instead.
-      </p>
-      <h2>
-        <a class="nav-anchor" id="Where-do-I-start?-1" href="#Where-do-I-start?-1">
-Where do I start?
-        </a>
-      </h2>
-      <p>
-        <em>
-... Charging Ion Capacitors ...
-        </em>
-      </p>
-      <p>
-The 
-        <a href="examples/logreg.html">
-examples
-        </a>
- give a feel for high-level usage.
-      </p>
-      <p>
-If you want to know why Flux is unique, or just don&#39;t want to see 
-        <em>
-those digits
-        </em>
- again, check out the 
-        <a href="models/basics.html">
-model building guide
-        </a>
- instead.
-      </p>
-      <p>
-Flux is meant to be played with. These docs have lots of code snippets; try them out in  
-        <a href="http://junolab.org">
-Juno
-        </a>
-!
-      </p>
-      <h2>
-        <a class="nav-anchor" id="Installation-1" href="#Installation-1">
-Installation
-        </a>
-      </h2>
-      <p>
-        <em>
-... Inflating Graviton Zeppelins ...
-        </em>
-      </p>
-<pre><code class="language-julia">Pkg.update()
-Pkg.add(&quot;Flux.jl&quot;)</code></pre>
-      <p>
-You&#39;ll also need a backend to run real training, if you don&#39;t have one already. Choose from 
-        <a href="https://github.com/dmlc/MXNet.jl">
-MXNet
-        </a>
- or 
-        <a href="https://github.com/malmaud/TensorFlow.jl">
-TensorFlow
-        </a>
- (MXNet is the recommended option if you&#39;re not sure):
-      </p>
-<pre><code class="language-julia">Pkg.add(&quot;MXNet&quot;) # or &quot;TensorFlow&quot;
-Pkg.test(&quot;Flux&quot;) # Make sure everything installed properly</code></pre>
-      <p>
-        <strong>
-Note:
-        </strong>
- TensorFlow integration may not work properly on Julia v0.6 yet.
-      </p>
-      <footer>
-        <hr/>
-        <a class="next" href="models/basics.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Model Building Basics
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li class="current"><a class="toctext" href="index.html">Home</a><ul class="internal"><li class="toplevel"><a class="toctext" href="#Installation-1">Installation</a></li></ul></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="index.html">Home</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/index.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Home</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Flux:-The-Julia-Machine-Learning-Library-1" href="#Flux:-The-Julia-Machine-Learning-Library-1">Flux: The Julia Machine Learning Library</a></h1><p>Flux is a library for machine learning. It comes &quot;batteries-included&quot; with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the <a href="https://github.com/FluxML/CuArrays.jl">GPU kernels</a>) and any part can be tweaked to your liking.</p><h1><a class="nav-anchor" id="Installation-1" href="#Installation-1">Installation</a></h1><p>Install <a href="https://julialang.org/downloads/">Julia 0.6.0 or later</a>, if you haven&#39;t already.</p><pre><code class="language-julia">Pkg.add(&quot;Flux&quot;)
+Pkg.test(&quot;Flux&quot;) # Check things installed correctly</code></pre><p>Start with the <a href="basics.html">basics</a>. The <a href="https://github.com/FluxML/model-zoo/">model zoo</a> is also a good starting point for many common kinds of models.</p><footer><hr/><a class="next" href="models/basics.html"><span class="direction">Next</span><span class="title">Basics</span></a></footer></article></body></html>
--- a/stable/internals.html
+++ b/stable/internals.html
@ -1,169 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Internals · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL="."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script>
-    <script src="../versions.js"></script>
-    <link href="../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li class="current">
-          <a class="toctext" href="internals.html">
-Internals
-          </a>
-          <ul class="internal"></ul>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-              <a href="internals.html">
-Internals
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/internals.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Internals-1" href="#Internals-1">
-Internals
-        </a>
-      </h1>
-      <p>
-[WIP]
-      </p>
-      <footer>
-        <hr/>
-        <a class="previous" href="contributing.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Contributing &amp; Help
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/models/basics.html
+++ b/stable/models/basics.html
@ -1,455 +1,78 @@
 <!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Model Building Basics · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Basics · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li class="current"><a class="toctext" href="basics.html">Basics</a><ul class="internal"><li><a class="toctext" href="#Taking-Gradients-1">Taking Gradients</a></li><li><a class="toctext" href="#Building-Layers-1">Building Layers</a></li><li><a class="toctext" href="#Stacking-It-Up-1">Stacking It Up</a></li></ul></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li><a class="toctext" href="layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="basics.html">Basics</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/basics.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Basics</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Model-Building-Basics-1" href="#Model-Building-Basics-1">Model-Building Basics</a></h1><h2><a class="nav-anchor" id="Taking-Gradients-1" href="#Taking-Gradients-1">Taking Gradients</a></h2><p>Consider a simple linear regression, which tries to predict an output array <code>y</code> from an input <code>x</code>. (It&#39;s a good idea to follow this example in the Julia repl.)</p><pre><code class="language-julia">W = rand(2, 5)
+b = rand(2)

-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li class="current">
-              <a class="toctext" href="basics.html">
-Model Building Basics
-              </a>
-              <ul class="internal">
-                <li>
-                  <a class="toctext" href="#Net-Functions-1">
-Net Functions
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#The-Model-1">
-The Model
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#Parameters-1">
-Parameters
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#Layers-1">
-Layers
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#Combining-Layers-1">
-Combining Layers
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#Dressed-like-a-model-1">
-Dressed like a model
-                  </a>
-                </li>
-              </ul>
-            </li>
-            <li>
-              <a class="toctext" href="templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-Building Models
-            </li>
-            <li>
-              <a href="basics.html">
-Model Building Basics
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/models/basics.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Model-Building-Basics-1" href="#Model-Building-Basics-1">
-Model Building Basics
-        </a>
-      </h1>
-      <h2>
-        <a class="nav-anchor" id="Net-Functions-1" href="#Net-Functions-1">
-Net Functions
-        </a>
-      </h2>
-      <p>
-Flux&#39;s core feature is the 
-<code>@net</code>
- macro, which adds some superpowers to regular ol&#39; Julia functions. Consider this simple function with the 
-<code>@net</code>
- annotation applied:
-      </p>
-<pre><code class="language-julia">@net f(x) = x .* x
-f([1,2,3]) == [1,4,9]</code></pre>
-      <p>
-This behaves as expected, but we have some extra features. For example, we can convert the function to run on 
-        <a href="https://www.tensorflow.org/">
-TensorFlow
-        </a>
- or 
-        <a href="https://github.com/dmlc/MXNet.jl">
-MXNet
-        </a>
-:
-      </p>
-<pre><code class="language-julia">f_mxnet = mxnet(f)
-f_mxnet([1,2,3]) == [1.0, 4.0, 9.0]</code></pre>
-      <p>
-Simples! Flux took care of a lot of boilerplate for us and just ran the multiplication on MXNet. MXNet can optimise this code for us, taking advantage of parallelism or running the code on a GPU.
-      </p>
-      <p>
-Using MXNet, we can get the gradient of the function, too:
-      </p>
-<pre><code class="language-julia">back!(f_mxnet, [1,1,1], [1,2,3]) == ([2.0, 4.0, 6.0],)</code></pre>
-      <p>
-<code>f</code>
- is effectively 
-<code>x^2</code>
-, so the gradient is 
-<code>2x</code>
- as expected.
-      </p>
-      <h2>
-        <a class="nav-anchor" id="The-Model-1" href="#The-Model-1">
-The Model
-        </a>
-      </h2>
-      <p>
-The core concept in Flux is the 
-        <em>
-model
-        </em>
-. This corresponds to what might be called a &quot;layer&quot; or &quot;module&quot; in other frameworks. A model is simply a differentiable function with parameters. Given a model 
-<code>m</code>
- we can do things like:
-      </p>
-<pre><code class="language-julia">m(x)           # See what the model does to an input vector `x`
-back!(m, Δ, x) # backpropogate the gradient `Δ` through `m`
-update!(m, η)  # update the parameters of `m` using the gradient</code></pre>
-      <p>
-We can implement a model however we like as long as it fits this interface. But as hinted above, 
-<code>@net</code>
- is a particularly easy way to do it, because it gives you these functions for free.
-      </p>
-      <h2>
-        <a class="nav-anchor" id="Parameters-1" href="#Parameters-1">
-Parameters
-        </a>
-      </h2>
-      <p>
-Consider how we&#39;d write a logistic regression. We just take the Julia code and add 
-<code>@net</code>
-.
-      </p>
-<pre><code class="language-julia">@net logistic(W, b, x) = softmax(x * W .+ b)
+predict(x) = W*x .+ b
+loss(x, y) = sum((predict(x) .- y).^2)

-W = randn(10, 2)
-b = randn(1, 2)
-x = rand(1, 10) # [0.563 0.346 0.780  …] – fake data
-y = [1 0] # our desired classification of `x`
+x, y = rand(5), rand(2) # Dummy data
+loss(x, y) # ~ 3</code></pre><p>To improve the prediction we can take the gradients of <code>W</code> and <code>b</code> with respect to the loss function and perform gradient descent. We could calculate gradients by hand, but Flux will do it for us if we tell it that <code>W</code> and <code>b</code> are trainable <em>parameters</em>.</p><pre><code class="language-julia">using Flux.Tracker: param, back!, data, grad

-ŷ = logistic(W, b, x) # [0.46 0.54]</code></pre>
-      <p>
-The network takes a set of 10 features (
-<code>x</code>
-, a row vector) and produces a classification 
-<code>ŷ</code>
-, equivalent to a probability of true vs false. 
-<code>softmax</code>
- scales the output to sum to one, so that we can interpret it as a probability distribution.
-      </p>
-      <p>
-We can use MXNet and get gradients:
-      </p>
-<pre><code class="language-julia">logisticm = mxnet(logistic)
-logisticm(W, b, x) # [0.46 0.54]
-back!(logisticm, [0.1 -0.1], W, b, x) # (dW, db, dx)</code></pre>
-      <p>
-The gradient 
-<code>[0.1 -0.1]</code>
- says that we want to increase 
-<code>ŷ[1]</code>
- and decrease 
-<code>ŷ[2]</code>
- to get closer to 
-<code>y</code>
-. 
-<code>back!</code>
- gives us the tweaks we need to make to each input (
-<code>W</code>
-, 
-<code>b</code>
-, 
-<code>x</code>
-) in order to do this. If we add these tweaks to 
-<code>W</code>
- and 
-<code>b</code>
- it will predict 
-<code>ŷ</code>
- more accurately.
-      </p>
-      <p>
-Treating parameters like 
-<code>W</code>
- and 
-<code>b</code>
- as inputs can get unwieldy in larger networks. Since they are both global we can use them directly:
-      </p>
-<pre><code class="language-julia">@net logistic(x) = softmax(x * W .+ b)</code></pre>
-      <p>
-However, this gives us a problem: how do we get their gradients?
-      </p>
-      <p>
-Flux solves this with the 
-<code>Param</code>
- wrapper:
-      </p>
-<pre><code class="language-julia">W = param(randn(10, 2))
-b = param(randn(1, 2))
-@net logistic(x) = softmax(x * W .+ b)</code></pre>
-      <p>
-This works as before, but now 
-<code>W.x</code>
- stores the real value and 
-<code>W.Δx</code>
- stores its gradient, so we don&#39;t have to manage it by hand. We can even use 
-<code>update!</code>
- to apply the gradients automatically.
-      </p>
-<pre><code class="language-julia">logisticm(x) # [0.46, 0.54]
+W = param(W)
+b = param(b)

-back!(logisticm, [-1 1], x)
-update!(logisticm, 0.1)
+l = loss(x, y)

-logisticm(x) # [0.51, 0.49]</code></pre>
-      <p>
-Our network got a little closer to the target 
-<code>y</code>
-. Now we just need to repeat this millions of times.
-      </p>
-      <p>
-        <em>
-Side note:
-        </em>
- We obviously need a way to calculate the &quot;tweak&quot; 
-<code>[0.1, -0.1]</code>
- automatically. We can use a loss function like 
-        <em>
-mean squared error
-        </em>
- for this:
-      </p>
-<pre><code class="language-julia"># How wrong is ŷ?
-mse([0.46, 0.54], [1, 0]) == 0.292
-# What change to `ŷ` will reduce the wrongness?
-back!(mse, -1, [0.46, 0.54], [1, 0]) == [0.54 -0.54]</code></pre>
-      <h2>
-        <a class="nav-anchor" id="Layers-1" href="#Layers-1">
-Layers
-        </a>
-      </h2>
-      <p>
-Bigger networks contain many affine transformations like 
-<code>W * x + b</code>
-. We don&#39;t want to write out the definition every time we use it. Instead, we can factor this out by making a function that produces models:
-      </p>
-<pre><code class="language-julia">function create_affine(in, out)
-  W = param(randn(out,in))
+back!(l)</code></pre><p><code>loss(x, y)</code> returns the same number, but it&#39;s now a <em>tracked</em> value that records gradients as it goes along. Calling <code>back!</code> then calculates the gradient of <code>W</code> and <code>b</code>. We can see what this gradient is, and modify <code>W</code> to train the model.</p><pre><code class="language-julia">grad(W)
+
+W.data .-= 0.1grad(W)
+
+loss(x, y) # ~ 2.5</code></pre><p>The loss has decreased a little, meaning that our prediction <code>x</code> is closer to the target <code>y</code>. If we have some data we can already try <a href="training/training.html">training the model</a>.</p><p>All deep learning in Flux, however complex, is a simple generalisation of this example. Of course, models can <em>look</em> very different – they might have millions of parameters or complex control flow, and there are ways to manage this complexity. Let&#39;s see what that looks like.</p><h2><a class="nav-anchor" id="Building-Layers-1" href="#Building-Layers-1">Building Layers</a></h2><p>It&#39;s common to create more complex models than the linear regression above. For example, we might want to have two linear layers with a nonlinearity like <a href="https://en.wikipedia.org/wiki/Sigmoid_function">sigmoid</a> (<code>σ</code>) in between them. In the above style we could write this as:</p><pre><code class="language-julia">W1 = param(rand(3, 5))
+b1 = param(rand(3))
+layer1(x) = W1 * x .+ b1
+
+W2 = param(rand(2, 3))
+b2 = param(rand(2))
+layer2(x) = W2 * x .+ b2
+
+model(x) = layer2(σ.(layer1(x)))
+
+model(rand(5)) # =&gt; 2-element vector</code></pre><p>This works but is fairly unwieldy, with a lot of repetition – especially as we add more layers. One way to factor this out is to create a function that returns linear layers.</p><pre><code class="language-julia">function linear(in, out)
+  W = param(randn(out, in))
  b = param(randn(out))
-  @net x -&gt; W * x + b
+  x -&gt; W * x .+ b
 end

-affine1 = create_affine(3,2)
-affine1([1,2,3])</code></pre>
-      <p>
-Flux has a 
-        <a href="templates.html">
-more powerful syntax
-        </a>
- for this pattern, but also provides a bunch of layers out of the box. So we can instead write:
-      </p>
-<pre><code class="language-julia">affine1 = Affine(5, 5)
-affine2 = Affine(5, 5)
+linear1 = linear(5, 3) # we can access linear1.W etc
+linear2 = linear(3, 2)

-softmax(affine1(x)) # [0.167952 0.186325 0.176683 0.238571 0.23047]
-softmax(affine2(x)) # [0.125361 0.246448 0.21966 0.124596 0.283935]</code></pre>
-      <h2>
-        <a class="nav-anchor" id="Combining-Layers-1" href="#Combining-Layers-1">
-Combining Layers
-        </a>
-      </h2>
-      <p>
-A more complex model usually involves many basic layers like 
-<code>affine</code>
-, where we use the output of one layer as the input to the next:
-      </p>
-<pre><code class="language-julia">mymodel1(x) = softmax(affine2(σ(affine1(x))))
-mymodel1(x1) # [0.187935, 0.232237, 0.169824, 0.230589, 0.179414]</code></pre>
-      <p>
-This syntax is again a little unwieldy for larger networks, so Flux provides another template of sorts to create the function for us:
-      </p>
-<pre><code class="language-julia">mymodel2 = Chain(affine1, σ, affine2, softmax)
-mymodel2(x2) # [0.187935, 0.232237, 0.169824, 0.230589, 0.179414]</code></pre>
-      <p>
-<code>mymodel2</code>
- is exactly equivalent to 
-<code>mymodel1</code>
- because it simply calls the provided functions in sequence. We don&#39;t have to predefine the affine layers and can also write this as:
-      </p>
-<pre><code class="language-julia">mymodel3 = Chain(
-  Affine(5, 5), σ,
-  Affine(5, 5), softmax)</code></pre>
-      <h2>
-        <a class="nav-anchor" id="Dressed-like-a-model-1" href="#Dressed-like-a-model-1">
-Dressed like a model
-        </a>
-      </h2>
-      <p>
-We noted above that a model is a function with trainable parameters. Normal functions like 
-<code>exp</code>
- are actually models too – they just happen to have 0 parameters. Flux doesn&#39;t care, and anywhere that you use one, you can use the other. For example, 
-<code>Chain</code>
- will happily work with regular functions:
-      </p>
-<pre><code class="language-julia">foo = Chain(exp, sum, log)
-foo([1,2,3]) == 3.408 == log(sum(exp([1,2,3])))</code></pre>
-      <footer>
-        <hr/>
-        <a class="previous" href="../index.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Home
-          </span>
-        </a>
-        <a class="next" href="templates.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Model Templates
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
+model(x) = linear2(σ.(linear1(x)))
+
+model(x) # =&gt; 2-element vector</code></pre><p>Another (equivalent) way is to create a struct that explicitly represents the affine layer.</p><pre><code class="language-julia">struct Affine
+  W
+  b
+end
+
+Affine(in::Integer, out::Integer) =
+  Affine(param(randn(out, in)), param(randn(out)))
+
+# Overload call, so the object can be used as a function
+(m::Affine)(x) = m.W * x .+ m.b
+
+a = Affine(10, 5)
+
+a(rand(10)) # =&gt; 5-element vector</code></pre><p>Congratulations! You just built the <code>Dense</code> layer that comes with Flux. Flux has many interesting layers available, but they&#39;re all things you could have built yourself very easily.</p><p>(There is one small difference with <code>Dense</code> – for convenience it also takes an activation function, like <code>Dense(10, 5, σ)</code>.)</p><h2><a class="nav-anchor" id="Stacking-It-Up-1" href="#Stacking-It-Up-1">Stacking It Up</a></h2><p>It&#39;s pretty common to write models that look something like:</p><pre><code class="language-julia">layer1 = Dense(10, 5, σ)
+# ...
+model(x) = layer3(layer2(layer1(x)))</code></pre><p>For long chains, it might be a bit more intuitive to have a list of layers, like this:</p><pre><code class="language-julia">using Flux
+
+layers = [Dense(10, 5, σ), Dense(5, 2), softmax]
+
+model(x) = foldl((x, m) -&gt; m(x), x, layers)
+
+model(rand(10)) # =&gt; 2-element vector</code></pre><p>Handily, this is also provided for in Flux:</p><pre><code class="language-julia">model2 = Chain(
+  Dense(10, 5, σ),
+  Dense(5, 2),
+  softmax)
+
+model2(rand(10)) # =&gt; 2-element vector</code></pre><p>This quickly starts to look like a high-level deep learning library; yet you can see how it falls out of simple abstractions, and we lose none of the power of Julia code.</p><p>A nice property of this approach is that because &quot;models&quot; are just functions (possibly with trainable parameters), you can also see this as simple function composition.</p><pre><code class="language-julia">m = Dense(5, 2) ∘ Dense(10, 5, σ)
+
+m(rand(10))</code></pre><p>Likewise, <code>Chain</code> will happily work with any Julia function.</p><pre><code class="language-julia">m = Chain(x -&gt; x^2, x -&gt; x+1)
+
+m(5) # =&gt; 26</code></pre><footer><hr/><a class="previous" href="../index.html"><span class="direction">Previous</span><span class="title">Home</span></a><a class="next" href="recurrence.html"><span class="direction">Next</span><span class="title">Recurrence</span></a></footer></article></body></html>
--- a/stable/models/debugging.html
+++ b/stable/models/debugging.html
@ -1,262 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Debugging · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li class="current">
-              <a class="toctext" href="debugging.html">
-Debugging
-              </a>
-              <ul class="internal"></ul>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-Building Models
-            </li>
-            <li>
-              <a href="debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/models/debugging.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Debugging-Models-1" href="#Debugging-Models-1">
-Debugging Models
-        </a>
-      </h1>
-      <p>
-Let&#39;s take our two-layer perceptron as an example again, running on MXNet:
-      </p>
-<pre><code class="language-julia">@net type TLP
-  first
-  second
-  function (x)
-    l1 = σ(first(x))
-    l2 = softmax(second(l1))
-  end
-end
-
-model = TLP(Affine(10, 20), Affine(21, 15))
-
-mxmodel = mxnet(model)
-
-mxmodel(rand(10))</code></pre>
-      <p>
-Unfortunately, this model has a (fairly obvious) typo, which means that the code above won&#39;t run. Instead we get an error message:
-      </p>
-<pre><code class="language-julia">Error in operator dot2: [21:28:21] src/operator/tensor/./matrix_op-inl.h:460:
-Check failed: lshape[1] == rshape[0] (20 vs. 21) dot shape error: (1,20) X (21,15)
-Flux.Affine at affine.jl:8
-TLP at basic.jl:6
-(::Flux.MX.Model)(::Flux.Batch{Array{Float64,1},Array{Float64,2}}) at model.jl:105
-(::Flux.MX.Model)(::Array{Float64,1}) at model.jl:107</code></pre>
-      <p>
-Most frameworks would only give the error message here – not so helpful if you have thousands of nodes in your computational graph. However, Flux is able to give good error reports 
-        <em>
-even when no Julia code has been run
-        </em>
-, e.g. when running on a backend like MXNet. This enables us to pinpoint the source of the error very quickly even in a large model.
-      </p>
-      <p>
-In this case, we can immediately see that the error occurred within an 
-<code>Affine</code>
- layer. There are two such layers, but this one was called from the second line of 
-<code>TLP</code>
-, so it must be the second 
-<code>Affine</code>
- layer we defined. The layer expected an input of length 21 but got 20 instead.
-      </p>
-      <p>
-Of course, often a stack trace isn&#39;t enough to figure out the source of an error. Another option is to simply step through the execution of the model using Gallium. While handy, however, stepping isn&#39;t always the best way to get a &quot;bird&#39;s eye view&quot; of the code. For that, Flux provides a macro called 
-<code>@shapes</code>
-:
-      </p>
-<pre><code class="language-julia">julia&gt; @shapes model(rand(5,10))
-
-# /Users/mike/test.jl, line 18:
-gull = σ(Affine(10, 20)(Input()[1]::(5,10))::(5,20))::(5,20)
-# /Users/mike/.julia/v0.6/Flux/src/layers/affine.jl, line 8:
-lobster = gull * _::(21,15) + _::(1,15)
-# /Users/mike/test.jl, line 19:
-raven = softmax(lobster)</code></pre>
-      <p>
-This is a lot like Julia&#39;s own 
-<code>code_warntype</code>
-; but instead of annotating expressions with types, we display their shapes. As a lowered form it has some quirks; input arguments are represented by 
-<code>Input()[N]</code>
- and parameters by an underscore.
-      </p>
-      <p>
-This makes the problem fairly obvious. We tried to multiply the output of the first layer 
-<code>(5, 20)</code>
- by a parameter 
-<code>(21, 15)</code>
-; the inner dimensions should have been equal.
-      </p>
-      <p>
-Notice that while the first 
-<code>Affine</code>
- layer is displayed as-is, the second was inlined and we see a reference to where the 
-<code>W * x + b</code>
- line was defined in Flux&#39;s source code. In this way Flux makes it easy to drill down into problem areas, without showing you the full graph of thousands of nodes at once.
-      </p>
-      <p>
-With the typo fixed, the output of 
-<code>@shapes</code>
- looks as follows:
-      </p>
-<pre><code class="language-julia"># /Users/mike/test.jl, line 18:
-opossum = σ(Affine(10, 20)(Input()[1]::(5,10))::(5,20))::(5,20)
-# /Users/mike/test.jl, line 19:
-wren = softmax(Affine(20, 15)(opossum)::(5,15))::(5,15)</code></pre>
-      <footer>
-        <hr/>
-        <a class="previous" href="recurrent.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Recurrence
-          </span>
-        </a>
-        <a class="next" href="../apis/batching.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Batching
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/models/layers.html
+++ b/stable/models/layers.html
@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Layer Reference · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li class="current"><a class="toctext" href="layers.html">Layer Reference</a><ul class="internal"><li><a class="toctext" href="#Model-Layers-1">Model Layers</a></li></ul></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="layers.html">Layer Reference</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/layers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Layer Reference</span><a class="fa fa-bars" href="#"></a></div></header><h2><a class="nav-anchor" id="Model-Layers-1" href="#Model-Layers-1">Model Layers</a></h2><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Chain" href="#Flux.Chain"><code>Flux.Chain</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Chain(layers...)</code></pre><p>Chain multiple layers / functions together, so that they are called in sequence on a given input.</p><pre><code class="language-none">m = Chain(x -&gt; x^2, x -&gt; x+1)
+m(5) == 26
+
+m = Chain(Dense(10, 5), Dense(5, 2))
+x = rand(10)
+m(x) == m[2](m[1](x))</code></pre><p><code>Chain</code> also supports indexing and slicing, e.g. <code>m[2]</code> or <code>m[1:end-1]</code>. <code>m[1:3](x)</code> will calculate the output of the first three layers.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/src/layers/basic.jl#L1-L16">source</a><br/></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Dense" href="#Flux.Dense"><code>Flux.Dense</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Dense(in::Integer, out::Integer, σ = identity)</code></pre><p>Creates a traditional <code>Dense</code> layer with parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-none">y = σ.(W * x .+ b)</code></pre><p>The input <code>x</code> must be a vector of length <code>in</code>, or a batch of vectors represented as an <code>in × N</code> matrix. The out <code>y</code> will be a vector or batch of length <code>in</code>.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/src/layers/basic.jl#L40-L49">source</a><br/></section><footer><hr/><a class="previous" href="recurrence.html"><span class="direction">Previous</span><span class="title">Recurrence</span></a><a class="next" href="../training/optimisers.html"><span class="direction">Next</span><span class="title">Optimisers</span></a></footer></article></body></html>
--- a/stable/models/recurrence.html
+++ b/stable/models/recurrence.html
@ -0,0 +1,43 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Recurrence · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li class="current"><a class="toctext" href="recurrence.html">Recurrence</a><ul class="internal"><li><a class="toctext" href="#Recurrent-Cells-1">Recurrent Cells</a></li><li><a class="toctext" href="#Stateful-Models-1">Stateful Models</a></li><li><a class="toctext" href="#Sequences-1">Sequences</a></li><li><a class="toctext" href="#Truncating-Gradients-1">Truncating Gradients</a></li></ul></li><li><a class="toctext" href="layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="recurrence.html">Recurrence</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/recurrence.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Recurrence</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Recurrent-Models-1" href="#Recurrent-Models-1">Recurrent Models</a></h1><h2><a class="nav-anchor" id="Recurrent-Cells-1" href="#Recurrent-Cells-1">Recurrent Cells</a></h2><p>In the simple feedforward case, our model <code>m</code> is a simple function from various inputs <code>xᵢ</code> to predictions <code>yᵢ</code>. (For example, each <code>x</code> might be an MNIST digit and each <code>y</code> a digit label.) Each prediction is completely independent of any others, and using the same <code>x</code> will always produce the same <code>y</code>.</p><pre><code class="language-julia">y₁ = f(x₁)
+y₂ = f(x₂)
+y₃ = f(x₃)
+# ...</code></pre><p>Recurrent networks introduce a <em>hidden state</em> that gets carried over each time we run the model. The model now takes the old <code>h</code> as an input, and produces a new <code>h</code> as output, each time we run it.</p><pre><code class="language-julia">h = # ... initial state ...
+y₁, h = f(x₁, h)
+y₂, h = f(x₂, h)
+y₃, h = f(x₃, h)
+# ...</code></pre><p>Information stored in <code>h</code> is preserved for the next prediction, allowing it to function as a kind of memory. This also means that the prediction made for a given <code>x</code> depends on all the inputs previously fed into the model.</p><p>(This might be important if, for example, each <code>x</code> represents one word of a sentence; the model&#39;s interpretation of the word &quot;bank&quot; should change if the previous input was &quot;river&quot; rather than &quot;investment&quot;.)</p><p>Flux&#39;s RNN support closely follows this mathematical perspective. The most basic RNN is as close as possible to a standard <code>Dense</code> layer, and the output and hidden state are the same. By convention, the hidden state is the first input and output.</p><pre><code class="language-julia">Wxh = randn(5, 10)
+Whh = randn(5, 5)
+b   = randn(5)
+
+function rnn(h, x)
+  h = tanh.(Wxh * x .+ Whh * h .+ b)
+  return h, h
+end
+
+x = rand(10) # dummy data
+h = rand(5)  # initial hidden state
+
+h, y = rnn(h, x)</code></pre><p>If you run the last line a few times, you&#39;ll notice the output <code>y</code> changing slightly even though the input <code>x</code> is the same.</p><p>We sometimes refer to functions like <code>rnn</code> above, which explicitly manage state, as recurrent <em>cells</em>. There are various recurrent cells available, which are documented in the <a href="models/layers.html">layer reference</a>. The hand-written example above can be replaced with:</p><pre><code class="language-julia">using Flux
+
+m = Flux.RNNCell(10, 5)
+
+x = rand(10) # dummy data
+h = rand(5)  # initial hidden state
+
+h, y = rnn(h, x)</code></pre><h2><a class="nav-anchor" id="Stateful-Models-1" href="#Stateful-Models-1">Stateful Models</a></h2><p>For the most part, we don&#39;t want to manage hidden states ourselves, but to treat our models as being stateful. Flux provides the <code>Recur</code> wrapper to do this.</p><pre><code class="language-julia">x = rand(10)
+h = rand(5)
+
+m = Flux.Recur(rnn, h)
+
+y = m(x)</code></pre><p>The <code>Recur</code> wrapper stores the state between runs in the <code>m.state</code> field.</p><p>If you use the <code>RNN(10, 5)</code> constructor – as opposed to <code>RNNCell</code> – you&#39;ll see that it&#39;s simply a wrapped cell.</p><pre><code class="language-julia">julia&gt; RNN(10, 5)
+Recur(RNNCell(Dense(15, 5)))</code></pre><h2><a class="nav-anchor" id="Sequences-1" href="#Sequences-1">Sequences</a></h2><p>Often we want to work with sequences of inputs, rather than individual <code>x</code>s.</p><pre><code class="language-julia">seq = [rand(10) for i = 1:10]</code></pre><p>With <code>Recur</code>, applying our model to each element of a sequence is trivial:</p><pre><code class="language-julia">map(m, seq) # returns a list of 5-element vectors</code></pre><p>To make this a bit more convenient, Flux has the <code>Seq</code> type. This is just a list, but tagged so that we know it&#39;s meant to be used as a sequence of data points.</p><pre><code class="language-julia">seq = Seq([rand(10) for i = 1:10])
+m(seq) # returns a new Seq of length 10</code></pre><p>When we apply the model <code>m</code> to a seq, it gets mapped over every item in the sequence in order. This is just like the code above, but often more convenient.</p><p>You can get this behaviour more generally with the <code>Over</code> wrapper.</p><pre><code class="language-julia">m = Over(Dense(10,5))
+m(seq) # returns a new Seq of length 10</code></pre><h2><a class="nav-anchor" id="Truncating-Gradients-1" href="#Truncating-Gradients-1">Truncating Gradients</a></h2><p>By default, calculating the gradients in a recurrent layer involves the entire history. For example, if we call the model on 100 inputs, calling <code>back!</code> will calculate the gradient for those 100 calls. If we then calculate another 10 inputs we have to calculate 110 gradients – this accumulates and quickly becomes expensive.</p><p>To avoid this we can <em>truncate</em> the gradient calculation, forgetting the history.</p><pre><code class="language-julia">truncate!(m)</code></pre><p>Calling <code>truncate!</code> wipes the slate clean, so we can call the model with more inputs without building up an expensive gradient computation.</p><footer><hr/><a class="previous" href="basics.html"><span class="direction">Previous</span><span class="title">Basics</span></a><a class="next" href="layers.html"><span class="direction">Next</span><span class="title">Layer Reference</span></a></footer></article></body></html>
--- a/stable/models/recurrent.html
+++ b/stable/models/recurrent.html
@ -1,269 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Recurrence · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="templates.html">
-Model Templates
-              </a>
-            </li>
-            <li class="current">
-              <a class="toctext" href="recurrent.html">
-Recurrence
-              </a>
-              <ul class="internal"></ul>
-            </li>
-            <li>
-              <a class="toctext" href="debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-Building Models
-            </li>
-            <li>
-              <a href="recurrent.html">
-Recurrence
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/models/recurrent.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Recurrent-Models-1" href="#Recurrent-Models-1">
-Recurrent Models
-        </a>
-      </h1>
-      <p>
-        <a href="https://en.wikipedia.org/wiki/Recurrent_neural_network">
-Recurrence
-        </a>
- is a first-class feature in Flux and recurrent models are very easy to build and use. Recurrences are often illustrated as cycles or self-dependencies in the graph; they can also be thought of as a hidden output from / input to the network. For example, for a sequence of inputs 
-<code>x1, x2, x3 ...</code>
- we produce predictions as follows:
-      </p>
-<pre><code class="language-julia">y1 = f(W, x1) # `f` is the model, `W` represents the parameters
-y2 = f(W, x2)
-y3 = f(W, x3)
-...</code></pre>
-      <p>
-Each evaluation is independent and the prediction made for a given input will always be the same. That makes a lot of sense for, say, MNIST images, but less sense when predicting a sequence. For that case we introduce the hidden state:
-      </p>
-<pre><code class="language-julia">y1, s = f(W, x1, s)
-y2, s = f(W, x2, s)
-y3, s = f(W, x3, s)
-...</code></pre>
-      <p>
-The state 
-<code>s</code>
- allows the prediction to depend not only on the current input 
-<code>x</code>
- but also on the history of past inputs.
-      </p>
-      <p>
-The simplest recurrent network looks as follows in Flux, and it should be familiar if you&#39;ve seen the equations defining an RNN before:
-      </p>
-<pre><code class="language-julia">@net type Recurrent
-  Wxy; Wyy; by
-  y
-  function (x)
-    y = tanh( x * Wxy + y{-1} * Wyy + by )
-  end
-end</code></pre>
-      <p>
-The only difference from a regular feed-forward layer is that we create a variable 
-<code>y</code>
- which is defined as depending on itself. The 
-<code>y{-1}</code>
- syntax means &quot;take the value of 
-<code>y</code>
- from the previous run of the network&quot;.
-      </p>
-      <p>
-Using recurrent layers is straightforward and no different feedforward ones in terms of the 
-<code>Chain</code>
- macro etc. For example:
-      </p>
-<pre><code class="language-julia">model = Chain(
-    Affine(784, 20), σ
-    Recurrent(20, 30),
-    Recurrent(30, 15))</code></pre>
-      <p>
-Before using the model we need to unroll it. This happens with the 
-<code>unroll</code>
- function:
-      </p>
-<pre><code class="language-julia">unroll(model, 20)</code></pre>
-      <p>
-This call creates an unrolled, feed-forward version of the model which accepts N (= 20) inputs and generates N predictions at a time. Essentially, the model is replicated N times and Flux ties the hidden outputs 
-<code>y</code>
- to hidden inputs.
-      </p>
-      <p>
-Here&#39;s a more complex recurrent layer, an LSTM, and again it should be familiar if you&#39;ve seen the 
-        <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/">
-equations
-        </a>
-:
-      </p>
-<pre><code class="language-julia">@net type LSTM
-  Wxf; Wyf; bf
-  Wxi; Wyi; bi
-  Wxo; Wyo; bo
-  Wxc; Wyc; bc
-  y; state
-  function (x)
-    # Gates
-    forget = σ( x * Wxf + y{-1} * Wyf + bf )
-    input  = σ( x * Wxi + y{-1} * Wyi + bi )
-    output = σ( x * Wxo + y{-1} * Wyo + bo )
-    # State update and output
-    state′ = tanh( x * Wxc + y{-1} * Wyc + bc )
-    state  = forget .* state{-1} + input .* state′
-    y = output .* tanh(state)
-  end
-end</code></pre>
-      <p>
-The only unfamiliar part is that we have to define all of the parameters of the LSTM upfront, which adds a few lines at the beginning.
-      </p>
-      <footer>
-        <hr/>
-        <a class="previous" href="templates.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Model Templates
-          </span>
-        </a>
-        <a class="next" href="debugging.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Debugging
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/models/templates.html
+++ b/stable/models/templates.html
@ -1,304 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Model Templates · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-ga('create', 'UA-36890222-9', 'auto');
-ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="../assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL=".."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script>
-    <script src="../../versions.js"></script>
-    <link href="../../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="../search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="../index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li class="current">
-              <a class="toctext" href="templates.html">
-Model Templates
-              </a>
-              <ul class="internal">
-                <li>
-                  <a class="toctext" href="#Models-in-templates-1">
-Models in templates
-                  </a>
-                </li>
-                <li>
-                  <a class="toctext" href="#Supported-syntax-1">
-Supported syntax
-                  </a>
-                </li>
-              </ul>
-            </li>
-            <li>
-              <a class="toctext" href="recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="../examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="../examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="../contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="../internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article id="docs">
-      <header>
-        <nav>
-          <ul>
-            <li>
-Building Models
-            </li>
-            <li>
-              <a href="templates.html">
-Model Templates
-              </a>
-            </li>
-          </ul>
-          <a class="edit-page" href="https://github.com/MikeInnes/Flux.jl/tree/7a85eff370b7c68d587b49699fa3f71e44993397/docs/src/models/templates.md">
-            <span class="fa">
-
-            </span>
- Edit on GitHub
-          </a>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-        <a class="nav-anchor" id="Model-Templates-1" href="#Model-Templates-1">
-Model Templates
-        </a>
-      </h1>
-      <p>
-We mentioned that we could factor out the repetition of defining affine layers with something like:
-      </p>
-<pre><code class="language-julia">function create_affine(in, out)
-  W = param(randn(out,in))
-  b = param(randn(out))
-  @net x -&gt; W * x + b
-end</code></pre>
-      <p>
-<code>@net type</code>
- syntax provides a shortcut for this:
-      </p>
-<pre><code class="language-julia">@net type MyAffine
-  W
-  b
-  x -&gt; x * W + b
-end
-
-# Convenience constructor
-MyAffine(in::Integer, out::Integer) =
-  MyAffine(randn(out, in), randn(out))
-
-model = Chain(MyAffine(5, 5), MyAffine(5, 5))
-
-model(x1) # [-1.54458,0.492025,0.88687,1.93834,-4.70062]</code></pre>
-      <p>
-This is almost exactly how 
-<code>Affine</code>
- is defined in Flux itself. Using 
-<code>@net type</code>
- gives us some extra conveniences:
-      </p>
-      <ul>
-        <li>
-          <p>
-It creates default constructor 
-<code>MyAffine(::AbstractArray, ::AbstractArray)</code>
- which initialises 
-<code>param</code>
-s for us;
-          </p>
-        </li>
-        <li>
-          <p>
-It subtypes 
-<code>Flux.Model</code>
- to explicitly mark this as a model;
-          </p>
-        </li>
-        <li>
-          <p>
-We can easily define custom constructors or instantiate 
-<code>Affine</code>
- with arbitrary weights of our choosing;
-          </p>
-        </li>
-        <li>
-          <p>
-We can dispatch on the 
-<code>Affine</code>
- type, for example to override how it gets converted to MXNet, or to hook into shape inference.
-          </p>
-        </li>
-      </ul>
-      <h2>
-        <a class="nav-anchor" id="Models-in-templates-1" href="#Models-in-templates-1">
-Models in templates
-        </a>
-      </h2>
-      <p>
-<code>@net</code>
- models can contain sub-models as well as just array parameters:
-      </p>
-<pre><code class="language-julia">@net type TLP
-  first
-  second
-  function (x)
-    l1 = σ(first(x))
-    l2 = softmax(second(l1))
-  end
-end</code></pre>
-      <p>
-Clearly, the 
-<code>first</code>
- and 
-<code>second</code>
- parameters are not arrays here, but should be models themselves, and produce a result when called with an input array 
-<code>x</code>
-. The 
-<code>Affine</code>
- layer fits the bill, so we can instantiate 
-<code>TLP</code>
- with two of them:
-      </p>
-<pre><code class="language-julia">model = TLP(Affine(10, 20),
-            Affine(20, 15))
-x1 = rand(20)
-model(x1) # [0.057852,0.0409741,0.0609625,0.0575354 ...</code></pre>
-      <p>
-You may recognise this as being equivalent to
-      </p>
-<pre><code class="language-julia">Chain(
-  Affine(10, 20), σ
-  Affine(20, 15), softmax)</code></pre>
-      <h2>
-        <a class="nav-anchor" id="Supported-syntax-1" href="#Supported-syntax-1">
-Supported syntax
-        </a>
-      </h2>
-      <p>
-The syntax used to define a forward pass like 
-<code>x -&gt; x*W + b</code>
- behaves exactly like Julia code for the most part. However, it&#39;s important to remember that it&#39;s defining a dataflow graph, not a general Julia expression. In practice this means that anything side-effectful, or things like control flow and 
-<code>println</code>
-s, won&#39;t work as expected. In future we&#39;ll continue to expand support for Julia syntax and features.
-      </p>
-      <footer>
-        <hr/>
-        <a class="previous" href="basics.html">
-          <span class="direction">
-Previous
-          </span>
-          <span class="title">
-Model Building Basics
-          </span>
-        </a>
-        <a class="next" href="recurrent.html">
-          <span class="direction">
-Next
-          </span>
-          <span class="title">
-Recurrence
-          </span>
-        </a>
-      </footer>
-    </article>
-  </body>
-</html>
--- a/stable/search.html
+++ b/stable/search.html
@ -1,153 +1,9 @@
 <!DOCTYPE html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8"/>
-    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
-    <title>
-Search · Flux
-    </title>
-    <script>
-(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Search · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

 ga('create', 'UA-36890222-9', 'auto');
 ga('send', 'pageview');
-
-    </script>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.5.0/styles/default.min.css" rel="stylesheet" type="text/css"/>
-    <link href="https://fonts.googleapis.com/css?family=Lato|Ubuntu+Mono" rel="stylesheet" type="text/css"/>
-    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
-    <link href="assets/documenter.css" rel="stylesheet" type="text/css"/>
-    <script>
-documenterBaseURL="."
-    </script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script>
-    <script src="../versions.js"></script>
-    <link href="../flux.css" rel="stylesheet" type="text/css"/>
-  </head>
-  <body>
-    <nav class="toc">
-      <h1>
-Flux
-      </h1>
-      <form class="search" action="search.html">
-        <select id="version-selector" onChange="window.location.href=this.value">
-          <option value="#" selected="selected" disabled="disabled">
-Version
-          </option>
-        </select>
-        <input id="search-query" name="q" type="text" placeholder="Search docs"/>
-      </form>
-      <ul>
-        <li>
-          <a class="toctext" href="index.html">
-Home
-          </a>
-        </li>
-        <li>
-          <span class="toctext">
-Building Models
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="models/basics.html">
-Model Building Basics
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/templates.html">
-Model Templates
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/recurrent.html">
-Recurrence
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="models/debugging.html">
-Debugging
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-Other APIs
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="apis/batching.html">
-Batching
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="apis/backends.html">
-Backends
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="apis/storage.html">
-Storing Models
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <span class="toctext">
-In Action
-          </span>
-          <ul>
-            <li>
-              <a class="toctext" href="examples/logreg.html">
-Simple MNIST
-              </a>
-            </li>
-            <li>
-              <a class="toctext" href="examples/char-rnn.html">
-Char RNN
-              </a>
-            </li>
-          </ul>
-        </li>
-        <li>
-          <a class="toctext" href="contributing.html">
-Contributing &amp; Help
-          </a>
-        </li>
-        <li>
-          <a class="toctext" href="internals.html">
-Internals
-          </a>
-        </li>
-      </ul>
-    </nav>
-    <article>
-      <header>
-        <nav>
-          <ul>
-            <li>
-Search
-            </li>
-          </ul>
-        </nav>
-        <hr/>
-      </header>
-      <h1>
-Search
-      </h1>
-      <p id="search-info">
-Number of results: 
-        <span id="search-results-number">
-loading...
-        </span>
-      </p>
-      <ul id="search-results"></ul>
-    </article>
-  </body>
-  <script src="search_index.js"></script>
-  <script src="assets/search.js"></script>
-</html>
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="contributing.html">Contributing &amp; Help</a></li></ul></nav><article><header><nav><ul><li>Search</li></ul></nav><hr/><div id="topbar"><span>Search</span><a class="fa fa-bars" href="#"></a></div></header><h1>Search</h1><p id="search-info">Number of results: <span id="search-results-number">loading...</span></p><ul id="search-results"></ul></article></body><script src="search_index.js"></script><script src="assets/search.js"></script></html>
--- a/stable/search_index.js
+++ b/stable/search_index.js
@ -9,19 +9,11 @@ var documenterSearchIndex = {"docs": [
 },

 {
-    "location": "index.html#Flux-1",
+    "location": "index.html#Flux:-The-Julia-Machine-Learning-Library-1",
    "page": "Home",
-    "title": "Flux",
+    "title": "Flux: The Julia Machine Learning Library",
    "category": "section",
-    "text": "... Initialising Photon Beams ...Flux is a library for machine learning, implemented in Julia. In a nutshell, it simply lets you run normal Julia code on a backend like TensorFlow. It also provides many conveniences for doing deep learning.Flux is very flexible. You can use a convenient Keras-like API if you want something simple, but you can also drop down to straight mathematics, or build your own abstractions. You can even use Flux's utilities (like optimisers) with a completely different backend (like Knet) or mix and match approaches.Note that Flux is in alpha. Many things work but the API is still in a state of... well, it might change.Note: If you're using Julia v0.5 please see this version of the docs instead."
-},
-
-{
-    "location": "index.html#Where-do-I-start?-1",
-    "page": "Home",
-    "title": "Where do I start?",
-    "category": "section",
-    "text": "... Charging Ion Capacitors ...The examples give a feel for high-level usage.If you want to know why Flux is unique, or just don't want to see those digits again, check out the model building guide instead.Flux is meant to be played with. These docs have lots of code snippets; try them out in  Juno!"
+    "text": "Flux is a library for machine learning. It comes \"batteries-included\" with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the GPU kernels) and any part can be tweaked to your liking."
 },

 {
@ -29,107 +21,51 @@ var documenterSearchIndex = {"docs": [
    "page": "Home",
    "title": "Installation",
    "category": "section",
-    "text": "... Inflating Graviton Zeppelins ...Pkg.update()\nPkg.add(\"Flux.jl\")You'll also need a backend to run real training, if you don't have one already. Choose from MXNet or TensorFlow (MXNet is the recommended option if you're not sure):Pkg.add(\"MXNet\") # or \"TensorFlow\"\nPkg.test(\"Flux\") # Make sure everything installed properlyNote: TensorFlow integration may not work properly on Julia v0.6 yet."
+    "text": "Install Julia 0.6.0 or later, if you haven't already.Pkg.add(\"Flux\")\nPkg.test(\"Flux\") # Check things installed correctlyStart with the basics. The model zoo is also a good starting point for many common kinds of models."
 },

 {
    "location": "models/basics.html#",
-    "page": "Model Building Basics",
-    "title": "Model Building Basics",
+    "page": "Basics",
+    "title": "Basics",
    "category": "page",
    "text": ""
 },

 {
    "location": "models/basics.html#Model-Building-Basics-1",
-    "page": "Model Building Basics",
-    "title": "Model Building Basics",
+    "page": "Basics",
+    "title": "Model-Building Basics",
    "category": "section",
    "text": ""
 },

 {
-    "location": "models/basics.html#Net-Functions-1",
-    "page": "Model Building Basics",
-    "title": "Net Functions",
+    "location": "models/basics.html#Taking-Gradients-1",
+    "page": "Basics",
+    "title": "Taking Gradients",
    "category": "section",
-    "text": "Flux's core feature is the @net macro, which adds some superpowers to regular ol' Julia functions. Consider this simple function with the @net annotation applied:@net f(x) = x .* x\nf([1,2,3]) == [1,4,9]This behaves as expected, but we have some extra features. For example, we can convert the function to run on TensorFlow or MXNet:f_mxnet = mxnet(f)\nf_mxnet([1,2,3]) == [1.0, 4.0, 9.0]Simples! Flux took care of a lot of boilerplate for us and just ran the multiplication on MXNet. MXNet can optimise this code for us, taking advantage of parallelism or running the code on a GPU.Using MXNet, we can get the gradient of the function, too:back!(f_mxnet, [1,1,1], [1,2,3]) == ([2.0, 4.0, 6.0],)f is effectively x^2, so the gradient is 2x as expected."
+    "text": "Consider a simple linear regression, which tries to predict an output array y from an input x. (It's a good idea to follow this example in the Julia repl.)W = rand(2, 5)\nb = rand(2)\n\npredict(x) = W*x .+ b\nloss(x, y) = sum((predict(x) .- y).^2)\n\nx, y = rand(5), rand(2) # Dummy data\nloss(x, y) # ~ 3To improve the prediction we can take the gradients of W and b with respect to the loss function and perform gradient descent. We could calculate gradients by hand, but Flux will do it for us if we tell it that W and b are trainable parameters.using Flux.Tracker: param, back!, data, grad\n\nW = param(W)\nb = param(b)\n\nl = loss(x, y)\n\nback!(l)loss(x, y) returns the same number, but it's now a tracked value that records gradients as it goes along. Calling back! then calculates the gradient of W and b. We can see what this gradient is, and modify W to train the model.grad(W)\n\nW.data .-= 0.1grad(W)\n\nloss(x, y) # ~ 2.5The loss has decreased a little, meaning that our prediction x is closer to the target y. If we have some data we can already try training the model.All deep learning in Flux, however complex, is a simple generalisation of this example. Of course, models can look very different – they might have millions of parameters or complex control flow, and there are ways to manage this complexity. Let's see what that looks like."
 },

 {
-    "location": "models/basics.html#The-Model-1",
-    "page": "Model Building Basics",
-    "title": "The Model",
+    "location": "models/basics.html#Building-Layers-1",
+    "page": "Basics",
+    "title": "Building Layers",
    "category": "section",
-    "text": "The core concept in Flux is the model. This corresponds to what might be called a \"layer\" or \"module\" in other frameworks. A model is simply a differentiable function with parameters. Given a model m we can do things like:m(x)           # See what the model does to an input vector `x`\nback!(m, Δ, x) # backpropogate the gradient `Δ` through `m`\nupdate!(m, η)  # update the parameters of `m` using the gradientWe can implement a model however we like as long as it fits this interface. But as hinted above, @net is a particularly easy way to do it, because it gives you these functions for free."
+    "text": "It's common to create more complex models than the linear regression above. For example, we might want to have two linear layers with a nonlinearity like sigmoid (σ) in between them. In the above style we could write this as:W1 = param(rand(3, 5))\nb1 = param(rand(3))\nlayer1(x) = W1 * x .+ b1\n\nW2 = param(rand(2, 3))\nb2 = param(rand(2))\nlayer2(x) = W2 * x .+ b2\n\nmodel(x) = layer2(σ.(layer1(x)))\n\nmodel(rand(5)) # => 2-element vectorThis works but is fairly unwieldy, with a lot of repetition – especially as we add more layers. One way to factor this out is to create a function that returns linear layers.function linear(in, out)\n  W = param(randn(out, in))\n  b = param(randn(out))\n  x -> W * x .+ b\nend\n\nlinear1 = linear(5, 3) # we can access linear1.W etc\nlinear2 = linear(3, 2)\n\nmodel(x) = linear2(σ.(linear1(x)))\n\nmodel(x) # => 2-element vectorAnother (equivalent) way is to create a struct that explicitly represents the affine layer.struct Affine\n  W\n  b\nend\n\nAffine(in::Integer, out::Integer) =\n  Affine(param(randn(out, in)), param(randn(out)))\n\n# Overload call, so the object can be used as a function\n(m::Affine)(x) = m.W * x .+ m.b\n\na = Affine(10, 5)\n\na(rand(10)) # => 5-element vectorCongratulations! You just built the Dense layer that comes with Flux. Flux has many interesting layers available, but they're all things you could have built yourself very easily.(There is one small difference with Dense – for convenience it also takes an activation function, like Dense(10, 5, σ).)"
 },

 {
-    "location": "models/basics.html#Parameters-1",
-    "page": "Model Building Basics",
-    "title": "Parameters",
+    "location": "models/basics.html#Stacking-It-Up-1",
+    "page": "Basics",
+    "title": "Stacking It Up",
    "category": "section",
-    "text": "Consider how we'd write a logistic regression. We just take the Julia code and add @net.@net logistic(W, b, x) = softmax(x * W .+ b)\n\nW = randn(10, 2)\nb = randn(1, 2)\nx = rand(1, 10) # [0.563 0.346 0.780  …] – fake data\ny = [1 0] # our desired classification of `x`\n\nŷ = logistic(W, b, x) # [0.46 0.54]The network takes a set of 10 features (x, a row vector) and produces a classification ŷ, equivalent to a probability of true vs false. softmax scales the output to sum to one, so that we can interpret it as a probability distribution.We can use MXNet and get gradients:logisticm = mxnet(logistic)\nlogisticm(W, b, x) # [0.46 0.54]\nback!(logisticm, [0.1 -0.1], W, b, x) # (dW, db, dx)The gradient [0.1 -0.1] says that we want to increase ŷ[1] and decrease ŷ[2] to get closer to y. back! gives us the tweaks we need to make to each input (W, b, x) in order to do this. If we add these tweaks to W and b it will predict ŷ more accurately.Treating parameters like W and b as inputs can get unwieldy in larger networks. Since they are both global we can use them directly:@net logistic(x) = softmax(x * W .+ b)However, this gives us a problem: how do we get their gradients?Flux solves this with the Param wrapper:W = param(randn(10, 2))\nb = param(randn(1, 2))\n@net logistic(x) = softmax(x * W .+ b)This works as before, but now W.x stores the real value and W.Δx stores its gradient, so we don't have to manage it by hand. We can even use update! to apply the gradients automatically.logisticm(x) # [0.46, 0.54]\n\nback!(logisticm, [-1 1], x)\nupdate!(logisticm, 0.1)\n\nlogisticm(x) # [0.51, 0.49]Our network got a little closer to the target y. Now we just need to repeat this millions of times.Side note: We obviously need a way to calculate the \"tweak\" [0.1, -0.1] automatically. We can use a loss function like mean squared error for this:# How wrong is ŷ?\nmse([0.46, 0.54], [1, 0]) == 0.292\n# What change to `ŷ` will reduce the wrongness?\nback!(mse, -1, [0.46, 0.54], [1, 0]) == [0.54 -0.54]"
+    "text": "It's pretty common to write models that look something like:layer1 = Dense(10, 5, σ)\n# ...\nmodel(x) = layer3(layer2(layer1(x)))For long chains, it might be a bit more intuitive to have a list of layers, like this:using Flux\n\nlayers = [Dense(10, 5, σ), Dense(5, 2), softmax]\n\nmodel(x) = foldl((x, m) -> m(x), x, layers)\n\nmodel(rand(10)) # => 2-element vectorHandily, this is also provided for in Flux:model2 = Chain(\n  Dense(10, 5, σ),\n  Dense(5, 2),\n  softmax)\n\nmodel2(rand(10)) # => 2-element vectorThis quickly starts to look like a high-level deep learning library; yet you can see how it falls out of simple abstractions, and we lose none of the power of Julia code.A nice property of this approach is that because \"models\" are just functions (possibly with trainable parameters), you can also see this as simple function composition.m = Dense(5, 2) ∘ Dense(10, 5, σ)\n\nm(rand(10))Likewise, Chain will happily work with any Julia function.m = Chain(x -> x^2, x -> x+1)\n\nm(5) # => 26"
 },

 {
-    "location": "models/basics.html#Layers-1",
-    "page": "Model Building Basics",
-    "title": "Layers",
-    "category": "section",
-    "text": "Bigger networks contain many affine transformations like W * x + b. We don't want to write out the definition every time we use it. Instead, we can factor this out by making a function that produces models:function create_affine(in, out)\n  W = param(randn(out,in))\n  b = param(randn(out))\n  @net x -> W * x + b\nend\n\naffine1 = create_affine(3,2)\naffine1([1,2,3])Flux has a more powerful syntax for this pattern, but also provides a bunch of layers out of the box. So we can instead write:affine1 = Affine(5, 5)\naffine2 = Affine(5, 5)\n\nsoftmax(affine1(x)) # [0.167952 0.186325 0.176683 0.238571 0.23047]\nsoftmax(affine2(x)) # [0.125361 0.246448 0.21966 0.124596 0.283935]"
-},
-
-{
-    "location": "models/basics.html#Combining-Layers-1",
-    "page": "Model Building Basics",
-    "title": "Combining Layers",
-    "category": "section",
-    "text": "A more complex model usually involves many basic layers like affine, where we use the output of one layer as the input to the next:mymodel1(x) = softmax(affine2(σ(affine1(x))))\nmymodel1(x1) # [0.187935, 0.232237, 0.169824, 0.230589, 0.179414]This syntax is again a little unwieldy for larger networks, so Flux provides another template of sorts to create the function for us:mymodel2 = Chain(affine1, σ, affine2, softmax)\nmymodel2(x2) # [0.187935, 0.232237, 0.169824, 0.230589, 0.179414]mymodel2 is exactly equivalent to mymodel1 because it simply calls the provided functions in sequence. We don't have to predefine the affine layers and can also write this as:mymodel3 = Chain(\n  Affine(5, 5), σ,\n  Affine(5, 5), softmax)"
-},
-
-{
-    "location": "models/basics.html#Dressed-like-a-model-1",
-    "page": "Model Building Basics",
-    "title": "Dressed like a model",
-    "category": "section",
-    "text": "We noted above that a model is a function with trainable parameters. Normal functions like exp are actually models too – they just happen to have 0 parameters. Flux doesn't care, and anywhere that you use one, you can use the other. For example, Chain will happily work with regular functions:foo = Chain(exp, sum, log)\nfoo([1,2,3]) == 3.408 == log(sum(exp([1,2,3])))"
-},
-
-{
-    "location": "models/templates.html#",
-    "page": "Model Templates",
-    "title": "Model Templates",
-    "category": "page",
-    "text": ""
-},
-
-{
-    "location": "models/templates.html#Model-Templates-1",
-    "page": "Model Templates",
-    "title": "Model Templates",
-    "category": "section",
-    "text": "We mentioned that we could factor out the repetition of defining affine layers with something like:function create_affine(in, out)\n  W = param(randn(out,in))\n  b = param(randn(out))\n  @net x -> W * x + b\nend@net type syntax provides a shortcut for this:@net type MyAffine\n  W\n  b\n  x -> x * W + b\nend\n\n# Convenience constructor\nMyAffine(in::Integer, out::Integer) =\n  MyAffine(randn(out, in), randn(out))\n\nmodel = Chain(MyAffine(5, 5), MyAffine(5, 5))\n\nmodel(x1) # [-1.54458,0.492025,0.88687,1.93834,-4.70062]This is almost exactly how Affine is defined in Flux itself. Using @net type gives us some extra conveniences:It creates default constructor MyAffine(::AbstractArray, ::AbstractArray) which initialises params for us;\nIt subtypes Flux.Model to explicitly mark this as a model;\nWe can easily define custom constructors or instantiate Affine with arbitrary weights of our choosing;\nWe can dispatch on the Affine type, for example to override how it gets converted to MXNet, or to hook into shape inference."
-},
-
-{
-    "location": "models/templates.html#Models-in-templates-1",
-    "page": "Model Templates",
-    "title": "Models in templates",
-    "category": "section",
-    "text": "@net models can contain sub-models as well as just array parameters:@net type TLP\n  first\n  second\n  function (x)\n    l1 = σ(first(x))\n    l2 = softmax(second(l1))\n  end\nendClearly, the first and second parameters are not arrays here, but should be models themselves, and produce a result when called with an input array x. The Affine layer fits the bill, so we can instantiate TLP with two of them:model = TLP(Affine(10, 20),\n            Affine(20, 15))\nx1 = rand(20)\nmodel(x1) # [0.057852,0.0409741,0.0609625,0.0575354 ...You may recognise this as being equivalent toChain(\n  Affine(10, 20), σ\n  Affine(20, 15), softmax)"
-},
-
-{
-    "location": "models/templates.html#Supported-syntax-1",
-    "page": "Model Templates",
-    "title": "Supported syntax",
-    "category": "section",
-    "text": "The syntax used to define a forward pass like x -> x*W + b behaves exactly like Julia code for the most part. However, it's important to remember that it's defining a dataflow graph, not a general Julia expression. In practice this means that anything side-effectful, or things like control flow and printlns, won't work as expected. In future we'll continue to expand support for Julia syntax and features."
-},
-
-{
-    "location": "models/recurrent.html#",
+    "location": "models/recurrence.html#",
    "page": "Recurrence",
    "title": "Recurrence",
    "category": "page",
@ -137,147 +73,147 @@ var documenterSearchIndex = {"docs": [
 },

 {
-    "location": "models/recurrent.html#Recurrent-Models-1",
+    "location": "models/recurrence.html#Recurrent-Models-1",
    "page": "Recurrence",
    "title": "Recurrent Models",
    "category": "section",
-    "text": "Recurrence is a first-class feature in Flux and recurrent models are very easy to build and use. Recurrences are often illustrated as cycles or self-dependencies in the graph; they can also be thought of as a hidden output from / input to the network. For example, for a sequence of inputs x1, x2, x3 ... we produce predictions as follows:y1 = f(W, x1) # `f` is the model, `W` represents the parameters\ny2 = f(W, x2)\ny3 = f(W, x3)\n...Each evaluation is independent and the prediction made for a given input will always be the same. That makes a lot of sense for, say, MNIST images, but less sense when predicting a sequence. For that case we introduce the hidden state:y1, s = f(W, x1, s)\ny2, s = f(W, x2, s)\ny3, s = f(W, x3, s)\n...The state s allows the prediction to depend not only on the current input x but also on the history of past inputs.The simplest recurrent network looks as follows in Flux, and it should be familiar if you've seen the equations defining an RNN before:@net type Recurrent\n  Wxy; Wyy; by\n  y\n  function (x)\n    y = tanh( x * Wxy + y{-1} * Wyy + by )\n  end\nendThe only difference from a regular feed-forward layer is that we create a variable y which is defined as depending on itself. The y{-1} syntax means \"take the value of y from the previous run of the network\".Using recurrent layers is straightforward and no different feedforward ones in terms of the Chain macro etc. For example:model = Chain(\n    Affine(784, 20), σ\n    Recurrent(20, 30),\n    Recurrent(30, 15))Before using the model we need to unroll it. This happens with the unroll function:unroll(model, 20)This call creates an unrolled, feed-forward version of the model which accepts N (= 20) inputs and generates N predictions at a time. Essentially, the model is replicated N times and Flux ties the hidden outputs y to hidden inputs.Here's a more complex recurrent layer, an LSTM, and again it should be familiar if you've seen the equations:@net type LSTM\n  Wxf; Wyf; bf\n  Wxi; Wyi; bi\n  Wxo; Wyo; bo\n  Wxc; Wyc; bc\n  y; state\n  function (x)\n    # Gates\n    forget = σ( x * Wxf + y{-1} * Wyf + bf )\n    input  = σ( x * Wxi + y{-1} * Wyi + bi )\n    output = σ( x * Wxo + y{-1} * Wyo + bo )\n    # State update and output\n    state′ = tanh( x * Wxc + y{-1} * Wyc + bc )\n    state  = forget .* state{-1} + input .* state′\n    y = output .* tanh(state)\n  end\nendThe only unfamiliar part is that we have to define all of the parameters of the LSTM upfront, which adds a few lines at the beginning."
+    "text": ""
 },

 {
-    "location": "models/debugging.html#",
-    "page": "Debugging",
-    "title": "Debugging",
+    "location": "models/recurrence.html#Recurrent-Cells-1",
+    "page": "Recurrence",
+    "title": "Recurrent Cells",
+    "category": "section",
+    "text": "In the simple feedforward case, our model m is a simple function from various inputs xᵢ to predictions yᵢ. (For example, each x might be an MNIST digit and each y a digit label.) Each prediction is completely independent of any others, and using the same x will always produce the same y.y₁ = f(x₁)\ny₂ = f(x₂)\ny₃ = f(x₃)\n# ...Recurrent networks introduce a hidden state that gets carried over each time we run the model. The model now takes the old h as an input, and produces a new h as output, each time we run it.h = # ... initial state ...\ny₁, h = f(x₁, h)\ny₂, h = f(x₂, h)\ny₃, h = f(x₃, h)\n# ...Information stored in h is preserved for the next prediction, allowing it to function as a kind of memory. This also means that the prediction made for a given x depends on all the inputs previously fed into the model.(This might be important if, for example, each x represents one word of a sentence; the model's interpretation of the word \"bank\" should change if the previous input was \"river\" rather than \"investment\".)Flux's RNN support closely follows this mathematical perspective. The most basic RNN is as close as possible to a standard Dense layer, and the output and hidden state are the same. By convention, the hidden state is the first input and output.Wxh = randn(5, 10)\nWhh = randn(5, 5)\nb   = randn(5)\n\nfunction rnn(h, x)\n  h = tanh.(Wxh * x .+ Whh * h .+ b)\n  return h, h\nend\n\nx = rand(10) # dummy data\nh = rand(5)  # initial hidden state\n\nh, y = rnn(h, x)If you run the last line a few times, you'll notice the output y changing slightly even though the input x is the same.We sometimes refer to functions like rnn above, which explicitly manage state, as recurrent cells. There are various recurrent cells available, which are documented in the layer reference. The hand-written example above can be replaced with:using Flux\n\nm = Flux.RNNCell(10, 5)\n\nx = rand(10) # dummy data\nh = rand(5)  # initial hidden state\n\nh, y = rnn(h, x)"
+},
+
+{
+    "location": "models/recurrence.html#Stateful-Models-1",
+    "page": "Recurrence",
+    "title": "Stateful Models",
+    "category": "section",
+    "text": "For the most part, we don't want to manage hidden states ourselves, but to treat our models as being stateful. Flux provides the Recur wrapper to do this.x = rand(10)\nh = rand(5)\n\nm = Flux.Recur(rnn, h)\n\ny = m(x)The Recur wrapper stores the state between runs in the m.state field.If you use the RNN(10, 5) constructor – as opposed to RNNCell – you'll see that it's simply a wrapped cell.julia> RNN(10, 5)\nRecur(RNNCell(Dense(15, 5)))"
+},
+
+{
+    "location": "models/recurrence.html#Sequences-1",
+    "page": "Recurrence",
+    "title": "Sequences",
+    "category": "section",
+    "text": "Often we want to work with sequences of inputs, rather than individual xs.seq = [rand(10) for i = 1:10]With Recur, applying our model to each element of a sequence is trivial:map(m, seq) # returns a list of 5-element vectorsTo make this a bit more convenient, Flux has the Seq type. This is just a list, but tagged so that we know it's meant to be used as a sequence of data points.seq = Seq([rand(10) for i = 1:10])\nm(seq) # returns a new Seq of length 10When we apply the model m to a seq, it gets mapped over every item in the sequence in order. This is just like the code above, but often more convenient.You can get this behaviour more generally with the Over wrapper.m = Over(Dense(10,5))\nm(seq) # returns a new Seq of length 10"
+},
+
+{
+    "location": "models/recurrence.html#Truncating-Gradients-1",
+    "page": "Recurrence",
+    "title": "Truncating Gradients",
+    "category": "section",
+    "text": "By default, calculating the gradients in a recurrent layer involves the entire history. For example, if we call the model on 100 inputs, calling back! will calculate the gradient for those 100 calls. If we then calculate another 10 inputs we have to calculate 110 gradients – this accumulates and quickly becomes expensive.To avoid this we can truncate the gradient calculation, forgetting the history.truncate!(m)Calling truncate! wipes the slate clean, so we can call the model with more inputs without building up an expensive gradient computation."
+},
+
+{
+    "location": "models/layers.html#",
+    "page": "Layer Reference",
+    "title": "Layer Reference",
    "category": "page",
    "text": ""
 },

 {
-    "location": "models/debugging.html#Debugging-Models-1",
-    "page": "Debugging",
-    "title": "Debugging Models",
-    "category": "section",
-    "text": "Let's take our two-layer perceptron as an example again, running on MXNet:@net type TLP\n  first\n  second\n  function (x)\n    l1 = σ(first(x))\n    l2 = softmax(second(l1))\n  end\nend\n\nmodel = TLP(Affine(10, 20), Affine(21, 15))\n\nmxmodel = mxnet(model)\n\nmxmodel(rand(10))Unfortunately, this model has a (fairly obvious) typo, which means that the code above won't run. Instead we get an error message:Error in operator dot2: [21:28:21] src/operator/tensor/./matrix_op-inl.h:460:\nCheck failed: lshape[1] == rshape[0] (20 vs. 21) dot shape error: (1,20) X (21,15)\nFlux.Affine at affine.jl:8\nTLP at basic.jl:6\n(::Flux.MX.Model)(::Flux.Batch{Array{Float64,1},Array{Float64,2}}) at model.jl:105\n(::Flux.MX.Model)(::Array{Float64,1}) at model.jl:107Most frameworks would only give the error message here – not so helpful if you have thousands of nodes in your computational graph. However, Flux is able to give good error reports even when no Julia code has been run, e.g. when running on a backend like MXNet. This enables us to pinpoint the source of the error very quickly even in a large model.In this case, we can immediately see that the error occurred within an Affine layer. There are two such layers, but this one was called from the second line of TLP, so it must be the second Affine layer we defined. The layer expected an input of length 21 but got 20 instead.Of course, often a stack trace isn't enough to figure out the source of an error. Another option is to simply step through the execution of the model using Gallium. While handy, however, stepping isn't always the best way to get a \"bird's eye view\" of the code. For that, Flux provides a macro called @shapes:julia> @shapes model(rand(5,10))\n\n# /Users/mike/test.jl, line 18:\ngull = σ(Affine(10, 20)(Input()[1]::(5,10))::(5,20))::(5,20)\n# /Users/mike/.julia/v0.6/Flux/src/layers/affine.jl, line 8:\nlobster = gull * _::(21,15) + _::(1,15)\n# /Users/mike/test.jl, line 19:\nraven = softmax(lobster)This is a lot like Julia's own code_warntype; but instead of annotating expressions with types, we display their shapes. As a lowered form it has some quirks; input arguments are represented by Input()[N] and parameters by an underscore.This makes the problem fairly obvious. We tried to multiply the output of the first layer (5, 20) by a parameter (21, 15); the inner dimensions should have been equal.Notice that while the first Affine layer is displayed as-is, the second was inlined and we see a reference to where the W * x + b line was defined in Flux's source code. In this way Flux makes it easy to drill down into problem areas, without showing you the full graph of thousands of nodes at once.With the typo fixed, the output of @shapes looks as follows:# /Users/mike/test.jl, line 18:\nopossum = σ(Affine(10, 20)(Input()[1]::(5,10))::(5,20))::(5,20)\n# /Users/mike/test.jl, line 19:\nwren = softmax(Affine(20, 15)(opossum)::(5,15))::(5,15)"
+    "location": "models/layers.html#Flux.Chain",
+    "page": "Layer Reference",
+    "title": "Flux.Chain",
+    "category": "Type",
+    "text": "Chain(layers...)\n\nChain multiple layers / functions together, so that they are called in sequence on a given input.\n\nm = Chain(x -> x^2, x -> x+1)\nm(5) == 26\n\nm = Chain(Dense(10, 5), Dense(5, 2))\nx = rand(10)\nm(x) == m[2](m[1](x))\n\nChain also supports indexing and slicing, e.g. m[2] or m[1:end-1]. m[1:3](x) will calculate the output of the first three layers.\n\n\n\n"
 },

 {
-    "location": "apis/batching.html#",
-    "page": "Batching",
-    "title": "Batching",
+    "location": "models/layers.html#Flux.Dense",
+    "page": "Layer Reference",
+    "title": "Flux.Dense",
+    "category": "Type",
+    "text": "Dense(in::Integer, out::Integer, σ = identity)\n\nCreates a traditional Dense layer with parameters W and b.\n\ny = σ.(W * x .+ b)\n\nThe input x must be a vector of length in, or a batch of vectors represented as an in × N matrix. The out y will be a vector or batch of length in.\n\n\n\n"
+},
+
+{
+    "location": "models/layers.html#Model-Layers-1",
+    "page": "Layer Reference",
+    "title": "Model Layers",
+    "category": "section",
+    "text": "Chain\nDense"
+},
+
+{
+    "location": "training/optimisers.html#",
+    "page": "Optimisers",
+    "title": "Optimisers",
    "category": "page",
    "text": ""
 },

 {
-    "location": "apis/batching.html#Batching-1",
-    "page": "Batching",
-    "title": "Batching",
+    "location": "training/optimisers.html#Optimisers-1",
+    "page": "Optimisers",
+    "title": "Optimisers",
    "category": "section",
-    "text": ""
+    "text": "Consider a simple linear regression. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters W and b.W = param(rand(2, 5))\nb = param(rand(2))\n\npredict(x) = W*x .+ b\nloss(x, y) = sum((predict(x) .- y).^2)\n\nx, y = rand(5), rand(2) # Dummy data\nl = loss(x, y) # ~ 3\nback!(l)We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here's one way to do that:using Flux.Tracker: data, grad\n\nfunction update()\n  η = 0.1 # Learning Rate\n  for p in (W, b)\n    x, Δ = data(p), grad(p)\n    x .-= η .* Δ # Apply the update\n    Δ .= 0       # Clear the gradient\n  end\nendIf we call update, the parameters W and b will change and our loss should go down.There are two pieces here: one is that we need a list of trainable parameters for the model ([W, b] in this case), and the other is the update step. In this case the update is simply gradient descent (x .-= η .* Δ), but we might choose to do something more advanced, like adding momentum.In this case, getting the variables is trivial, but you can imagine it'd be more of a pain with some complex stack of layers.m = Chain(\n  Dense(10, 5, σ),\n  Dense(5, 2), softmax)Instead of having to write [m[1].W, m[1].b, ...], Flux provides a params function params(m) that returns a list of all parameters in the model for you.For the update step, there's nothing whatsoever wrong with writing the loop above – it'll work just fine – but Flux provides various optimisers that make it more convenient.opt = SGD([W, b], 0.1) # Gradient descent with learning rate 0.1\n\nopt()An optimiser takes a parameter list and returns a function that does the same thing as update above. We can pass either opt or update to our training loop, which will then run the optimiser after every mini-batch of data."
 },

 {
-    "location": "apis/batching.html#Basics-1",
-    "page": "Batching",
-    "title": "Basics",
-    "category": "section",
-    "text": "Existing machine learning frameworks and libraries represent batching, and other properties of data, only implicitly. Your machine learning data is a large N-dimensional array, which may have a shape like:100 × 50 × 256 × 256Typically, this might represent that you have (say) a batch of 100 samples, where each sample is a 50-long sequence of 256×256 images. This is great for performance, but array operations often become much more cumbersome as a result. Especially if you manipulate dimensions at runtime as an optimisation, debugging models can become extremely fiddly, with a proliferation of X × Y × Z arrays and no information about where they came from.Flux introduces a new approach where the batch dimension is represented explicitly as part of the data. For example:julia> xs = Batch([[1,2,3], [4,5,6]])\n2-element Batch of Vector{Int64}:\n [1,2,3]\n [4,5,6]Batches are represented the way we think about them; as a list of data points. We can do all the usual array operations with them, including getting the first with xs[1], iterating over them and so on. The trick is that under the hood, the data is batched into a single array:julia> rawbatch(xs)\n2×3 Array{Int64,2}:\n 1  2  3\n 4  5  6When we put a Batch object into a model, the model is ultimately working with a single array, which means there's no performance overhead and we get the full benefit of standard batching.Turning a set of vectors into a matrix is fairly easy anyway, so what's the big deal? Well, it gets more interesting as we start working with more complex data. Say we were working with 4×4 images:julia> xs = Batch([[1 2; 3 4], [5 6; 7 8]])\n2-element Flux.Batch of Array{Int64,2}:\n [1 2; 3 4]\n [5 6; 7 8]The raw batch array is much messier, and harder to recognise:julia> rawbatch(xs)\n2×2×2 Array{Int64,3}:\n[:, :, 1] =\n 1  3\n 5  7\n\n[:, :, 2] =\n 2  4\n 6  8Furthermore, because the batches acts like a list of arrays, we can use simple and familiar operations on it:julia> map(flatten, xs)\n2-element Array{Array{Int64,1},1}:\n [1,3,2,4]\n [5,7,6,8]flatten is simple enough over a single data point, but flattening a batched data set is more complex and you end up needing arcane array operations like mapslices. A Batch can just handle this for you for free, and more importantly it ensures that your operations are correct – that you haven't mixed up your batch and data dimensions, or used the wrong array op, and so on."
-},
-
-{
-    "location": "apis/batching.html#Sequences-and-Nesting-1",
-    "page": "Batching",
-    "title": "Sequences and Nesting",
-    "category": "section",
-    "text": "As well as Batch, there's a structure called Seq which behaves very similarly. Let's say we have two one-hot encoded DNA sequences:julia> x1 = Seq([[0,1,0,0], [1,0,0,0], [0,0,0,1]]) # [A, T, C, G]\njulia> x2 = Seq([[0,0,1,0], [0,0,0,1], [0,0,1,0]])\n\njulia> rawbatch(x1)\n3×4 Array{Int64,2}:\n 0  1  0  0\n 1  0  0  0\n 0  0  0  1This is identical to Batch so far; but where it gets interesting is that you can actually nest these types:julia> xs = Batch([x1, x2])\n2-element Batch of Seq of Vector{Int64}:\n [[0,1,0,0],[1,0,0,0],[0,0,0,1]]\n [[0,0,1,0],[0,0,0,1],[0,0,1,0]]Again, this represents itself intuitively as a list-of-lists-of-lists, but rawbatch shows that the real underlying value is an Array{Int64,3} of shape 2×3×4."
-},
-
-{
-    "location": "apis/batching.html#Future-Work-1",
-    "page": "Batching",
-    "title": "Future Work",
-    "category": "section",
-    "text": "The design of batching is still a fairly early work in progress, though it's used in a few places in the system. For example, all Flux models expect to be given Batch objects which are unwrapped into raw arrays for the computation. Models will convert their arguments if necessary, so it's convenient to call a model with a single data point like f([1,2,3]).Right now, the Batch or Seq types always stack along the left-most dimension. In future, this will be customisable, and Flux will provide implementations of common functions that are generic across the batch dimension. This brings the following benefits:Code can be written in a batch-agnostic way or be generic across batching strategies.\nBatching and optimisations, like switching batch dimensions, can be expressed by the programmer with compiler support; fewer code changes are required and optimisations are guaranteed not to break the model.\nThis also opens the door for more automatic optimisations, e.g. having the compiler explore the search base of possible batching combinations.Here's a more detailed illustration of how it might look for code to be \"generic across batching\". Take for example a weight matrix W times a vector x, as used in a logistic regression or a simple neural network:   W    *   x  =>   y\n(10×28) * (28) => (10)If we want to work with a batch of 50 xs, one option is to stack the data into a matrix of size 28 × 50.   W    *    x    =>    y\n(10×28) * (28×50) => (10×50)This works, but we may find that it's slow or doesn't fit well with the rest of the model, which batches on the first dimension. For that reason we may instead want to put the data in a 50 × 28 matrix and alter the code as follows:   x    *    W'   =>    y\n(50×28) * (28×10) => (50×10)to make the shapes work out. This code change is not ideal; in more complex cases it can become fiddly and error-prone, and it means that the code is less reusable, tied to a particular implementation strategy.There's an alternative. We keep the same code, but represent the batched xs as either a Batch{Vector,1} or a Batch{Vector,2}, depending on how the data is stacked. Then we can simply overload * as follows:*(W::Matrix, x::Batch{Vector,1}) = x * W'\n*(W::Matrix, x::Batch{Vector,2}) = W * xThis means that we can always write W*x, and the code is reusable in a larger network regardless of the overall batching approach. Moreover, Julia's type system ensures there's no runtime cost to doing this, and we can compile the code appropriately for backends like TensorFlow as well."
-},
-
-{
-    "location": "apis/backends.html#",
-    "page": "Backends",
-    "title": "Backends",
+    "location": "training/training.html#",
+    "page": "Training",
+    "title": "Training",
    "category": "page",
    "text": ""
 },

 {
-    "location": "apis/backends.html#Backends-1",
-    "page": "Backends",
-    "title": "Backends",
+    "location": "training/training.html#Training-1",
+    "page": "Training",
+    "title": "Training",
    "category": "section",
-    "text": ""
+    "text": "To actually train a model we need three things:A loss function, that evaluates how well a model is doing given some input data.\nA collection of data points that will be provided to the loss function.\nAn optimiser that will update the model parameters appropriately.With these we can call Flux.train!:Flux.train!(loss, data, opt)There are plenty of examples in the model zoo."
 },

 {
-    "location": "apis/backends.html#Basic-Usage-1",
-    "page": "Backends",
-    "title": "Basic Usage",
+    "location": "training/training.html#Loss-Functions-1",
+    "page": "Training",
+    "title": "Loss Functions",
    "category": "section",
-    "text": "model = Chain(Affine(10, 20), σ, Affine(20, 15), softmax)\nxs = rand(10)Currently, Flux's pure-Julia backend has no optimisations. This means that callingmodel(rand(10)) #> [0.0650, 0.0655, ...]directly won't have great performance. In order to run a computationally intensive training process, we need to use a backend like MXNet or TensorFlow.This is easy to do. Just call either mxnet or tf on a model to convert it to a model of that kind:mxmodel = mxnet(model)\nmxmodel(xs) #> [0.0650, 0.0655, ...]\n# or\ntfmodel = tf(model)\ntfmodel(xs) #> [0.0650, 0.0655, ...]These new models look and feel exactly like every other model in Flux, including returning the same result when you call them, and can be trained as usual using Flux.train!(). The difference is that the computation is being carried out by a backend, which will usually give a large speedup."
+    "text": "The loss that we defined in basics is completely valid for training. We can also define a loss in terms of some model:m = Chain(\n  Dense(784, 32, σ),\n  Dense(32, 10), softmax)\n\nloss(x, y) = Flux.mse(m(x), y)The loss will almost always be defined in terms of some cost function that measures the distance of the prediction m(x) from the target y. Flux has several of these built in, like mse for mean squared error or logloss for cross entropy loss, but you can calculate it however you want."
 },

 {
-    "location": "apis/backends.html#Native-Integration-1",
-    "page": "Backends",
-    "title": "Native Integration",
+    "location": "training/training.html#Callbacks-1",
+    "page": "Training",
+    "title": "Callbacks",
    "category": "section",
-    "text": "Flux aims to provide high-level APIs that work well across backends, but in some cases you may want to take advantage of features specific to a given backend. In these cases it's easy to \"drop down\" and use the backend's API directly, where appropriate. For example:using MXNet\nFlux.loadmx()\n\nmxmodel = mx.FeedForward(model)This returns a standard mx.FeedForward instance, just like you might have created using MXNet's usual API. You can then use this with MXNet's data provider implementation, custom optimisers, or distributed training processes.Same goes for TensorFlow, where it's easy to create a Tensor object:using TensorFlow\nFlux.loadtf()\n\nx  = placeholder(Float32)\ny = Tensor(model, x)This makes makes it easy to take advantage of Flux's model description and debugging tools while also getting the benefit of the work put into these backends. You can check out how this looks with the integration examples here."
+    "text": "train! takes an additional argument, cb, that's used for callbacks so that you can observe the training process. For example:train!(loss, data, opt, cb = () -> println(\"training\"))Callbacks are called for every batch of training data. You can slow this down using Flux.throttle(f, timeout) which prevents f from being called more than once every timeout seconds.A more typical callback might look like this:test_x, test_y = # ... create single batch of test data ...\nevalcb() = @show(loss(test_x, test_y))\n\nFlux.train!(loss, data, opt,\n            cb = throttle(evalcb, 5))"
 },

 {
-    "location": "apis/storage.html#",
-    "page": "Storing Models",
-    "title": "Storing Models",
+    "location": "data/onehot.html#",
+    "page": "One-Hot Encoding",
+    "title": "One-Hot Encoding",
    "category": "page",
    "text": ""
 },

 {
-    "location": "apis/storage.html#Loading-and-Saving-Models-1",
-    "page": "Storing Models",
-    "title": "Loading and Saving Models",
+    "location": "data/onehot.html#One-Hot-Encoding-1",
+    "page": "One-Hot Encoding",
+    "title": "One-Hot Encoding",
    "category": "section",
-    "text": "model = Chain(Affine(10, 20), σ, Affine(20, 15), softmax)Since models are just simple Julia data structures, it's very easy to save and load them using any of Julia's existing serialisation formats. For example, using Julia's built-in serialize:open(io -> serialize(io, model), \"model.jls\", \"w\")\nopen(io -> deserialize(io), \"model.jls\")One issue with serialize is that it doesn't promise compatibility between major Julia versions. For longer-term storage it's good to use a package like JLD.using JLD\n@save \"model.jld\" model\n@load \"model.jld\"However, JLD will break for some models as functions are not supported on 0.5+. You can resolve that by checking out this branch.Right now this is the only storage format Flux supports. In future Flux will support loading and saving other model formats (on an as-needed basis)."
+    "text": "It's common to encode categorical variables (like true, false or cat, dog) in \"one-of-k\" or \"one-hot\" form. Flux provides the onehot function to make this easy.julia> using Flux: onehot\n\njulia> onehot(:b, [:a, :b, :c])\n3-element Flux.OneHotVector:\n false\n  true\n false\n\njulia> onehot(:c, [:a, :b, :c])\n3-element Flux.OneHotVector:\n false\n false\n  trueThe inverse is argmax (which can take a general probability distribution, as well as just booleans).julia> argmax(ans, [:a, :b, :c])\n:c\n\njulia> argmax([true, false, false], [:a, :b, :c])\n:a\n\njulia> argmax([0.3, 0.2, 0.5], [:a, :b, :c])\n:c"
 },

 {
-    "location": "examples/logreg.html#",
-    "page": "Simple MNIST",
-    "title": "Simple MNIST",
-    "category": "page",
-    "text": ""
-},
-
-{
-    "location": "examples/logreg.html#Recognising-MNIST-Digits-1",
-    "page": "Simple MNIST",
-    "title": "Recognising MNIST Digits",
+    "location": "data/onehot.html#Batches-1",
+    "page": "One-Hot Encoding",
+    "title": "Batches",
    "category": "section",
-    "text": "This walkthrough example will take you through writing a multi-layer perceptron that classifies MNIST digits with high accuracy.First, we load the data using the MNIST package:using Flux, MNIST\nusing Flux: accuracy\n\ndata = [(trainfeatures(i), onehot(trainlabel(i), 0:9)) for i = 1:60_000]\ntrain = data[1:50_000]\ntest = data[50_001:60_000]The only Flux-specific function here is onehot, which takes a class label and turns it into a one-hot-encoded vector that we can use for training. For example:julia> onehot(:b, [:a, :b, :c])\n3-element Array{Int64,1}:\n 0\n 1\n 0Otherwise, the format of the data is simple enough, it's just a list of tuples from input to output. For example:julia> data[1]\n([0.0,0.0,0.0, … 0.0,0.0,0.0],[0,0,0,0,0,1,0,0,0,0])data[1][1] is a 28*28 == 784 length vector (mostly zeros due to the black background) and data[1][2] is its classification.Now we define our model, which will simply be a function from one to the other.m = @Chain(\n  Input(784),\n  Affine(128), relu,\n  Affine( 64), relu,\n  Affine( 10), softmax)\n\nmodel = mxnet(m) # Convert to MXNetWe can try this out on our data already:julia> model(tobatch(data[1][1]))\n10-element Array{Float64,1}:\n 0.10614  \n 0.0850447\n 0.101474\n ...The model gives a probability of about 0.1 to each class – which is a way of saying, \"I have no idea\". This isn't too surprising as we haven't shown it any data yet. This is easy to fix:Flux.train!(model, train, η = 1e-3,\n            cb = [()->@show accuracy(m, test)])The training step takes about 5 minutes (to make it faster we can do smarter things like batching). If you run this code in Juno, you'll see a progress meter, which you can hover over to see the remaining computation time.Towards the end of the training process, Flux will have reported that the accuracy of the model is now about 90%. We can try it on our data again:10-element Array{Float32,1}:\n ...\n 5.11423f-7\n 0.9354     \n 3.1033f-5  \n 0.000127077\n ...Notice the class at 93%, suggesting our model is very confident about this image. We can use onecold to compare the true and predicted classes:julia> onecold(data[1][2], 0:9)\n5\n\njulia> onecold(model(tobatch(data[1][1])), 0:9)\n5Success!"
-},
-
-{
-    "location": "examples/char-rnn.html#",
-    "page": "Char RNN",
-    "title": "Char RNN",
-    "category": "page",
-    "text": ""
-},
-
-{
-    "location": "examples/char-rnn.html#Char-RNN-1",
-    "page": "Char RNN",
-    "title": "Char RNN",
-    "category": "section",
-    "text": "This walkthrough will take you through a model like that used in Karpathy's 2015 blog post, which can learn to generate text in the style of Shakespeare (or whatever else you may use as input). shakespeare_input.txt is here.using Flux\nimport StatsBase: wsampleFirstly, we define up front how many steps we want to unroll the RNN, and the number of data points to batch together. Then we create some functions to prepare our data, using Flux's built-in utilities.nunroll = 50\nnbatch = 50\n\ngetseqs(chars, alphabet) =\n  sequences((onehot(Float32, char, alphabet) for char in chars), nunroll)\ngetbatches(chars, alphabet) =\n  batches((getseqs(part, alphabet) for part in chunk(chars, nbatch))...)Because we want the RNN to predict the next letter at each iteration, our target data is simply our input data offset by one. For example, if the input is \"The quick brown fox\", the target will be \"he quick brown fox \". Each letter is one-hot encoded and sequences are batched together to create the training data.input = readstring(\"shakespeare_input.txt\");\nalphabet = unique(input)\nN = length(alphabet)\n\n# An iterator of (input, output) pairs\ntrain = zip(getbatches(input, alphabet), getbatches(input[2:end], alphabet))\n# We will evaluate the loss on a particular batch to monitor the training.\neval = tobatch.(first(drop(train, 5)))Creating the model and training it is straightforward:model = Chain(\n  Input(N),\n  LSTM(N, 256),\n  LSTM(256, 256),\n  Affine(256, N),\n  softmax)\n\nm = tf(unroll(model, nunroll))\n\n# Call this to see how the model is doing\nevalcb = () -> @show logloss(m(eval[1]), eval[2])\n\n@time Flux.train!(m, train, η = 0.1, loss = logloss, cb = [evalcb])\nFinally, we can sample the model. For sampling we remove the softmax from the end of the chain so that we can \"sharpen\" the resulting probabilities.function sample(model, n, temp = 1)\n  s = [rand(alphabet)]\n  m = unroll1(model)\n  for i = 1:n-1\n    push!(s, wsample(alphabet, softmax(m(unsqueeze(onehot(s[end], alphabet)))./temp)[1,:]))\n  end\n  return string(s...)\nend\n\nsample(model[1:end-1], 100)sample then produces a string of Shakespeare-like text. This won't produce great results after only a single epoch (though they will be recognisably different from the untrained model). Going for 30 epochs or so produces good results.Trained on a dataset from base Julia, the network can produce code like:function show(io::IO, md::Githompty)\n    Buffer(jowerTriangular(inals[i], initabs_indices), characters, side, nextfloat(typeof(x)))\n    isnull(r) && return\n    start::I!\n    for j = 1:length(b,1)\n        a = s->cosvect(code)\n        return\n    end\n    indsERenv | maximum(func,lsg))\n    for i = 1:last(Abjelar) && fname (=== nothing)\n        throw(ArgumentError(\"read is declave non-fast-a/remaining of not descride method names\"))\n    end\n    if e.ht === Int\n        # update file to a stroducative, but is decould.\n        # xna i -GB =# [unsafe_color <c *has may num 20<11E 16/s\n        tuple | Expr(:(UnitLowerTriangular(transpose,(repl.ptr)))\n        dims = pipe_read(s,Int(a)...)\n    ex,0 + y.uilid_func & find_finwprevend(msg,:2)\n    ex = stage(c)\n    # uvvalue begin\n    end\nend"
+    "text": "onehotbatch creates a batch (matrix) of one-hot vectors, and argmax treats matrices as batches.julia> using Flux: onehotbatch\n\njulia> onehotbatch([:b, :a, :b], [:a, :b, :c])\n3×3 Flux.OneHotMatrix:\n false   true  false\n  true  false   true\n false  false  false\n\njulia> onecold(ans, [:a, :b, :c])\n3-element Array{Symbol,1}:\n  :b\n  :a\n  :bNote that these operations returned OneHotVector and OneHotMatrix rather than Arrays. OneHotVectors behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly.. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood."
 },

 {
@ -289,27 +225,11 @@ var documenterSearchIndex = {"docs": [
 },

 {
-    "location": "contributing.html#Contributing-1",
+    "location": "contributing.html#Contributing-and-Help-1",
    "page": "Contributing & Help",
-    "title": "Contributing",
+    "title": "Contributing & Help",
    "category": "section",
-    "text": "If you need help, please ask on the Julia forum or on Flux's Gitter.Right now, the best way to help out is to try out the examples and report any issues or missing features as you find them. The second best way is to help us spread the word, perhaps by starring the repo.If you're interested in hacking on Flux, most of the code is pretty straightforward. Adding new layer definitions or cost functions is simple using the Flux DSL itself, and things like data utilities and training processes are all plain Julia code. The compiler directory is a bit more involved and is documented in internals, but most changes won't need to touch that.If you get stuck or need anything, let us know!"
-},
-
-{
-    "location": "internals.html#",
-    "page": "Internals",
-    "title": "Internals",
-    "category": "page",
-    "text": ""
-},
-
-{
-    "location": "internals.html#Internals-1",
-    "page": "Internals",
-    "title": "Internals",
-    "category": "section",
-    "text": "[WIP]"
+    "text": "If you need help, please ask on the Julia forum, the slack (channel #machine-learning), or Flux's Gitter.Right now, the best way to help out is to try out the examples and report any issues or missing features as you find them. The second best way is to help us spread the word, perhaps by starring the repo.If you're interested in hacking on Flux, most of the code is pretty straightforward. Adding new layer definitions or cost functions is simple using the Flux DSL itself, and things like data utilities and training processes are all plain Julia code.If you get stuck or need anything, let us know!"
 },

 ]}
--- a/stable/siteinfo.js
+++ b/stable/siteinfo.js
@ -0,0 +1 @@
+var DOCUMENTER_CURRENT_VERSION = "stable";
--- a/stable/training/optimisers.html
+++ b/stable/training/optimisers.html
@ -0,0 +1,30 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Optimisers · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li class="current"><a class="toctext" href="optimisers.html">Optimisers</a><ul class="internal"></ul></li><li><a class="toctext" href="training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="optimisers.html">Optimisers</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/training/optimisers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Optimisers</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Optimisers-1" href="#Optimisers-1">Optimisers</a></h1><p>Consider a <a href="../models/basics.html">simple linear regression</a>. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-julia">W = param(rand(2, 5))
+b = param(rand(2))
+
+predict(x) = W*x .+ b
+loss(x, y) = sum((predict(x) .- y).^2)
+
+x, y = rand(5), rand(2) # Dummy data
+l = loss(x, y) # ~ 3
+back!(l)</code></pre><p>We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here&#39;s one way to do that:</p><pre><code class="language-julia">using Flux.Tracker: data, grad
+
+function update()
+  η = 0.1 # Learning Rate
+  for p in (W, b)
+    x, Δ = data(p), grad(p)
+    x .-= η .* Δ # Apply the update
+    Δ .= 0       # Clear the gradient
+  end
+end</code></pre><p>If we call <code>update</code>, the parameters <code>W</code> and <code>b</code> will change and our loss should go down.</p><p>There are two pieces here: one is that we need a list of trainable parameters for the model (<code>[W, b]</code> in this case), and the other is the update step. In this case the update is simply gradient descent (<code>x .-= η .* Δ</code>), but we might choose to do something more advanced, like adding momentum.</p><p>In this case, getting the variables is trivial, but you can imagine it&#39;d be more of a pain with some complex stack of layers.</p><pre><code class="language-julia">m = Chain(
+  Dense(10, 5, σ),
+  Dense(5, 2), softmax)</code></pre><p>Instead of having to write <code>[m[1].W, m[1].b, ...]</code>, Flux provides a params function <code>params(m)</code> that returns a list of all parameters in the model for you.</p><p>For the update step, there&#39;s nothing whatsoever wrong with writing the loop above – it&#39;ll work just fine – but Flux provides various <em>optimisers</em> that make it more convenient.</p><pre><code class="language-julia">opt = SGD([W, b], 0.1) # Gradient descent with learning rate 0.1
+
+opt()</code></pre><p>An optimiser takes a parameter list and returns a function that does the same thing as <code>update</code> above. We can pass either <code>opt</code> or <code>update</code> to our <a href="training/training.html">training loop</a>, which will then run the optimiser after every mini-batch of data.</p><footer><hr/><a class="previous" href="../models/layers.html"><span class="direction">Previous</span><span class="title">Layer Reference</span></a><a class="next" href="training.html"><span class="direction">Next</span><span class="title">Training</span></a></footer></article></body></html>
--- a/stable/training/training.html
+++ b/stable/training/training.html
@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Training · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="optimisers.html">Optimisers</a></li><li class="current"><a class="toctext" href="training.html">Training</a><ul class="internal"><li><a class="toctext" href="#Loss-Functions-1">Loss Functions</a></li><li><a class="toctext" href="#Callbacks-1">Callbacks</a></li></ul></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="training.html">Training</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/training/training.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Training</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Training-1" href="#Training-1">Training</a></h1><p>To actually train a model we need three things:</p><ul><li><p>A <em>loss function</em>, that evaluates how well a model is doing given some input data.</p></li><li><p>A collection of data points that will be provided to the loss function.</p></li><li><p>An <a href="optimisers.html">optimiser</a> that will update the model parameters appropriately.</p></li></ul><p>With these we can call <code>Flux.train!</code>:</p><pre><code class="language-julia">Flux.train!(loss, data, opt)</code></pre><p>There are plenty of examples in the <a href="https://github.com/FluxML/model-zoo">model zoo</a>.</p><h2><a class="nav-anchor" id="Loss-Functions-1" href="#Loss-Functions-1">Loss Functions</a></h2><p>The <code>loss</code> that we defined in <a href="../models/basics.html">basics</a> is completely valid for training. We can also define a loss in terms of some model:</p><pre><code class="language-julia">m = Chain(
+  Dense(784, 32, σ),
+  Dense(32, 10), softmax)
+
+loss(x, y) = Flux.mse(m(x), y)</code></pre><p>The loss will almost always be defined in terms of some <em>cost function</em> that measures the distance of the prediction <code>m(x)</code> from the target <code>y</code>. Flux has several of these built in, like <code>mse</code> for mean squared error or <code>logloss</code> for cross entropy loss, but you can calculate it however you want.</p><h2><a class="nav-anchor" id="Callbacks-1" href="#Callbacks-1">Callbacks</a></h2><p><code>train!</code> takes an additional argument, <code>cb</code>, that&#39;s used for callbacks so that you can observe the training process. For example:</p><pre><code class="language-julia">train!(loss, data, opt, cb = () -&gt; println(&quot;training&quot;))</code></pre><p>Callbacks are called for every batch of training data. You can slow this down using <code>Flux.throttle(f, timeout)</code> which prevents <code>f</code> from being called more than once every <code>timeout</code> seconds.</p><p>A more typical callback might look like this:</p><pre><code class="language-julia">test_x, test_y = # ... create single batch of test data ...
+evalcb() = @show(loss(test_x, test_y))
+
+Flux.train!(loss, data, opt,
+            cb = throttle(evalcb, 5))</code></pre><footer><hr/><a class="previous" href="optimisers.html"><span class="direction">Previous</span><span class="title">Optimisers</span></a><a class="next" href="../data/onehot.html"><span class="direction">Next</span><span class="title">One-Hot Encoding</span></a></footer></article></body></html>
--- a/v0.3.0/assets/arrow.svg
+++ b/v0.3.0/assets/arrow.svg
@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="16.5mm"
+   height="8.6603003mm"
+   viewBox="0 0 58.464567 30.686103"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="arrow.svg">
+  <defs
+     id="defs4" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="11.2"
+     inkscape:cx="14.209234"
+     inkscape:cy="29.780479"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:window-width="1920"
+     inkscape:window-height="1053"
+     inkscape:window-x="0"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1" />
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-1021.6761)">
+    <path
+       style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 0,1021.6761 35.433071,0 -17.716536,30.6861 z"
+       id="path4140"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccc" />
+  </g>
+</svg>
--- a/v0.3.0/assets/documenter.css
+++ b/v0.3.0/assets/documenter.css
@ -0,0 +1,541 @@
+/*
+ * The default CSS style for Documenter.jl generated sites
+ *
+ * Heavily inspired by the Julia Sphinx theme
+ *     https://github.com/JuliaLang/JuliaDoc
+ * which extends the sphinx_rtd_theme
+ *     https://github.com/snide/sphinx_rtd_theme
+ *
+ * Part of Documenter.jl
+ *     https://github.com/JuliaDocs/Documenter.jl
+ *
+ * License: MIT
+ */
+
+/* fonts */
+body, input {
+  font-family: 'Lato', 'Helvetica Neue', Arial, sans-serif;
+  font-size: 16px;
+  color: #222;
+  text-rendering: optimizeLegibility;
+}
+
+pre, code {
+  font-family: 'Roboto Mono', Monaco, courier, monospace;
+  font-size: 0.90em;
+}
+
+pre code {
+  font-size: 1em;
+}
+
+a {
+    color: #2980b9;
+    text-decoration: none;
+}
+
+a:hover {
+    color: #3091d1;
+}
+
+a:visited {
+    color: #9b59b6;
+}
+
+body {
+    line-height: 1.5;
+}
+
+h1 { font-size: 1.75em; }
+h2 { font-size: 1.50em; }
+h3 { font-size: 1.25em; }
+h4 { font-size: 1.15em; }
+h5 { font-size: 1.10em; }
+h6 { font-size: 1em; }
+
+h4, h5, h6 {
+    margin: 1em 0;
+}
+
+img {
+    max-width: 100%;
+}
+
+table {
+    border-collapse: collapse;
+    margin: 1em 0;
+}
+
+th, td {
+    border: 1px solid #e1e4e5;
+    padding: 0.5em 1em;
+}
+
+th {
+    border-bottom-width: 2px;
+}
+
+tr:nth-child(even) {
+    background-color: #f3f6f6;
+}
+
+hr {
+    border: 0;
+    border-top: 1px solid #e5e5e5;
+}
+
+/* Inline code and code blocks */
+
+code {
+    padding: 0.1em;
+    background-color: rgba(0,0,0,.04);
+    border-radius: 3px;
+}
+
+pre {
+    background-color: #f5f5f5;
+    border: 1px solid #dddddd;
+    border-radius: 3px;
+    padding: 0.5em;
+    overflow: auto;
+}
+
+pre code {
+    padding: 0;
+    background-color: initial;
+}
+
+/* Headers in admonitions and docstrings */
+.admonition h1,
+article section.docstring h1 {
+    font-size: 1.25em;
+}
+
+.admonition h2,
+article section.docstring h2 {
+    font-size: 1.10em;
+}
+
+.admonition h3,
+.admonition h4,
+.admonition h5,
+.admonition h6,
+article section.docstring h3,
+article section.docstring h4,
+article section.docstring h5,
+article section.docstring h6 {
+    font-size: 1em;
+}
+
+/* Navigation */
+nav.toc {
+    position: fixed;
+    top: 0;
+    left: 0;
+    bottom: 0;
+    width: 20em;
+    overflow-y: auto;
+    padding: 1em 0;
+    background-color: #fcfcfc;
+    box-shadow: inset -14px 0px 5px -12px rgb(210,210,210);
+}
+
+nav.toc .logo {
+    margin: 0 auto;
+    display: block;
+    max-height: 6em;
+    max-width: 18em;
+}
+
+nav.toc h1 {
+    text-align: center;
+    margin-top: .57em;
+    margin-bottom: 0;
+}
+
+nav.toc select {
+    display: block;
+    height: 2em;
+    padding: 0 1.6em 0 1em;
+    min-width: 7em;
+    max-width: 90%;
+    max-width: calc(100% - 5em);
+    margin: 0 auto;
+    font-size: .83em;
+    border: 1px solid #c9c9c9;
+    border-radius: 1em;
+
+    /* TODO: doesn't seem to be centered on Safari */
+    text-align: center;
+    text-align-last: center;
+
+    appearance: none;
+    -moz-appearance: none;
+    -webkit-appearance: none;
+
+    background: white url("arrow.svg");
+    background-size: 1.155em;
+    background-repeat: no-repeat;
+    background-position: right;
+}
+
+nav.toc select:hover {
+    border: 1px solid #a0a0a0;
+}
+
+nav.toc select option {
+    text-align: center;
+}
+
+nav.toc input {
+    display: block;
+    height: 2em;
+    width: 90%;
+    width: calc(100% - 5em);
+    margin: 1.2em auto;
+    padding: 0 1em;
+    border: 1px solid #c9c9c9;
+    border-radius: 1em;
+    font-size: .83em;
+}
+
+nav.toc > ul * {
+    margin: 0;
+}
+
+nav.toc ul {
+    color: #404040;
+    padding: 0;
+    list-style: none;
+}
+
+nav.toc ul .toctext {
+    color: inherit;
+    display: block;
+}
+
+nav.toc ul a:hover {
+    color: #fcfcfc;
+    background-color: #4e4a4a;
+}
+
+nav.toc ul.internal a {
+    color: inherit;
+    display: block;
+}
+
+nav.toc ul.internal a:hover {
+    background-color: #d6d6d6;
+}
+
+nav.toc ul.internal {
+    background-color: #e3e3e3;
+    box-shadow: inset -14px 0px 5px -12px rgb(210,210,210);
+    list-style: none;
+}
+
+nav.toc ul.internal li.toplevel {
+    border-top: 1px solid #c9c9c9;
+    font-weight: bold;
+}
+
+nav.toc ul.internal li.toplevel:first-child {
+    border-top: none;
+}
+
+nav.toc .toctext {
+    padding-top: 0.3em;
+    padding-bottom: 0.3em;
+    padding-right: 1em;
+}
+
+nav.toc ul .toctext {
+    padding-left: 1em;
+}
+
+nav.toc ul ul .toctext {
+    padding-left: 2em;
+}
+
+nav.toc ul ul ul .toctext {
+    padding-left: 3em;
+}
+
+nav.toc li.current > .toctext {
+    border-top: 1px solid #c9c9c9;
+    border-bottom: 1px solid #c9c9c9;
+    color: #404040;
+    font-weight: bold;
+    background-color: white;
+}
+
+article {
+    margin-left: 20em;
+    min-width: 20em;
+    max-width: 48em;
+    padding: 2em;
+}
+
+article > header {}
+
+article > header div#topbar {
+    display: none;
+}
+
+article > header nav ul {
+    display: inline-block;
+    list-style: none;
+    margin: 0;
+    padding: 0;
+}
+
+article > header nav li {
+    display: inline-block;
+    padding-right: 0.2em;
+}
+
+article > header nav li:before {
+    content: "»";
+    padding-right: 0.2em;
+}
+
+article > header .edit-page {
+    float: right;
+}
+
+article > footer {}
+
+article > footer a.prev {
+    float: left;
+}
+article > footer a.next {
+    float: right;
+}
+
+article > footer a .direction:after {
+    content: ": ";
+}
+
+article hr {
+    margin: 1em 0;
+}
+
+article section.docstring {
+    border: 1px solid #ddd;
+    margin: 0.5em 0;
+    padding: 0.5em;
+    border-radius: 3px;
+}
+
+article section.docstring .docstring-header {
+    margin-bottom: 1em;
+}
+
+article section.docstring .docstring-binding {
+    color: #333;
+    font-weight: bold;
+}
+
+article section.docstring .docstring-category {
+    font-style: italic;
+}
+
+article section.docstring a.source-link {
+  float: left;
+  font-weight: bold;
+}
+
+.nav-anchor,
+.nav-anchor:hover,
+.nav-anchor:visited {
+    color: #333;
+}
+
+/*
+ * Admonitions
+ *
+ * Colors (title, body)
+ * warning: #f0b37e #ffedcc (orange)
+ * note:    #6ab0de #e7f2fa (blue)
+ * tip:     #1abc9c #dbfaf4 (green)
+*/
+.admonition {
+    border-radius: 3px;
+    background-color: #eeeeee;
+}
+
+.admonition-title {
+    border-radius: 3px 3px 0 0;
+    background-color: #9b9b9b;
+    padding: 0.15em 0.5em;
+}
+
+.admonition-text {
+    padding: 0.5em;
+}
+
+.admonition-text > :first-child {
+    margin-top: 0;
+}
+
+.admonition-text > :last-child {
+    margin-bottom: 0;
+}
+
+.admonition > .admonition-title:before {
+    font-family: "FontAwesome";
+    margin-right: 5px;
+    content: "\f06a";
+}
+
+.admonition.warning > .admonition-title {
+    background-color: #f0b37e;
+}
+
+.admonition.warning {
+    background-color: #ffedcc;
+}
+
+.admonition.note > .admonition-title {
+    background-color: #6ab0de;
+}
+
+.admonition.note {
+    background-color: #e7f2fa;
+}
+
+.admonition.tip > .admonition-title {
+    background-color: #1abc9c;
+}
+
+.admonition.tip {
+    background-color: #dbfaf4;
+}
+
+
+/* footnotes */
+.footnote {
+    padding-left: 0.8em;
+    border-left: 2px solid #ccc;
+}
+
+/* Search page */
+#search-results .category {
+    font-size: smaller;
+}
+
+#search-results .category:before {
+    content: " ";
+}
+
+/* Overriding the <code> block style of highligh.js.
+ * We have to override the padding and the background-color, since we style this
+ * part ourselves. Specifically, we style the <pre> surrounding the <code>, while
+ * highlight.js applies the .hljs style directly to the <code> tag.
+ */
+.hljs {
+    background-color: transparent;
+    padding: 0;
+}
+
+@media only screen and (max-width: 768px) {
+    nav.toc {
+        position: fixed;
+        overflow-y: scroll;
+        width: 16em;
+        left: -16em;
+        -webkit-overflow-scrolling: touch;
+        -webkit-transition-property: left; /* Safari */
+        -webkit-transition-duration: 0.3s; /* Safari */
+        transition-property: left;
+        transition-duration: 0.3s;
+        -webkit-transition-timing-function: ease-out; /* Safari */
+        transition-timing-function: ease-out;
+        z-index: 2;
+    }
+
+    nav.toc.show {
+        left: 0;
+    }
+
+    article {
+        margin-left: 0;
+        padding: 3em 0.9em 0 0.9em; /* top right bottom left */
+        overflow-wrap: break-word;
+    }
+
+    article > header {
+        position: fixed;
+        left: 0;
+        z-index: 1;
+    }
+
+    article > header nav, hr {
+        display: none;
+    }
+
+    article > header div#topbar {
+        display: block; /* is mobile */
+        position: fixed;
+        width: 100%;
+        height: 1.5em;
+        padding-top: 1em;
+        padding-bottom: 1em;
+        background-color: #fcfcfc;
+        box-shadow: 0 1px 3px rgba(0,0,0,.26);
+        top: 0;
+        -webkit-transition-property: top; /* Safari */
+        -webkit-transition-duration: 0.3s; /* Safari */
+        transition-property: top;
+        transition-duration: 0.3s;
+    }
+
+    article > header div#topbar.headroom--unpinned.headroom--not-top.headroom--not-bottom {
+        top: -4em;
+        -webkit-transition-property: top; /* Safari */
+        -webkit-transition-duration: 0.7s; /* Safari */
+        transition-property: top;
+        transition-duration: 0.7s;
+    }
+
+    article > header div#topbar span {
+        position: fixed;
+        width: 80%;
+        height: 1.5em;
+        margin-top: -0.1em;
+        margin-left: 0.9em;
+        font-size: 1.2em;
+        overflow: hidden;
+    }
+
+    article > header div#topbar a.fa-bars {
+        float: right;
+        padding: 0.6em;
+        margin-top: -0.6em;
+        margin-right: 0.3em;
+        font-size: 1.5em;
+    }
+
+    article > header div#topbar a.fa-bars:visited {
+        color: #3091d1;
+    }
+
+    article table {
+        overflow-x: auto;
+        display: block;
+    }
+
+    article div.MathJax_Display {
+        overflow: scroll;
+    }
+
+    article span.MathJax {
+        overflow: hidden;
+    }
+}
+
+@media only screen and (max-width: 320px) {
+    body {
+        font-size: 15px;
+    }
+}
--- a/v0.3.0/assets/documenter.js
+++ b/v0.3.0/assets/documenter.js
@ -0,0 +1,129 @@
+/*
+ * Part of Documenter.jl
+ *     https://github.com/JuliaDocs/Documenter.jl
+ *
+ * License: MIT
+ */
+
+requirejs.config({
+    paths: {
+        'jquery': 'https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min',
+        'jqueryui': 'https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.12.0/jquery-ui.min',
+        'headroom': 'https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.3/headroom.min',
+        'mathjax': 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS_HTML',
+        'highlight': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min',
+        'highlight-julia': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/julia.min',
+        'highlight-julia-repl': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/julia-repl.min',
+    },
+    shim: {
+        'mathjax' : {
+            exports: "MathJax"
+        },
+        'highlight-julia': ['highlight'],
+        'highlight-julia-repl': ['highlight'],
+    }
+});
+
+// Load MathJax
+require(['mathjax'], function(MathJax) {
+    MathJax.Hub.Config({
+      "tex2jax": {
+        inlineMath: [['$','$'], ['\\(','\\)']],
+        processEscapes: true
+      }
+    });
+    MathJax.Hub.Config({
+      config: ["MMLorHTML.js"],
+      jax: [
+        "input/TeX",
+        "output/HTML-CSS",
+        "output/NativeMML"
+      ],
+      extensions: [
+        "MathMenu.js",
+        "MathZoom.js",
+        "TeX/AMSmath.js",
+        "TeX/AMSsymbols.js",
+        "TeX/autobold.js",
+        "TeX/autoload-all.js"
+      ]
+    });
+    MathJax.Hub.Config({
+      TeX: { equationNumbers: { autoNumber: "AMS" } }
+    });
+})
+
+require(['jquery', 'highlight', 'highlight-julia', 'highlight-julia-repl'], function($, hljs) {
+    $(document).ready(function() {
+        hljs.initHighlighting();
+    })
+
+})
+
+// update the version selector with info from the siteinfo.js and ../versions.js files
+require(['jquery'], function($) {
+    $(document).ready(function() {
+        var version_selector = $("#version-selector");
+
+        // add the current version to the selector based on siteinfo.js, but only if the selector is empty
+        if (typeof DOCUMENTER_CURRENT_VERSION !== 'undefined' && $('#version-selector > option').length == 0) {
+            var option = $("<option value='#' selected='selected'>" + DOCUMENTER_CURRENT_VERSION + "</option>");
+            version_selector.append(option);
+        }
+
+        if (typeof DOC_VERSIONS !== 'undefined') {
+            var existing_versions = $('#version-selector > option');
+            var existing_versions_texts = existing_versions.map(function(i,x){return x.text});
+            DOC_VERSIONS.forEach(function(each) {
+                var version_url = documenterBaseURL + "/../" + each;
+                var existing_id = $.inArray(each, existing_versions_texts);
+                // if not already in the version selector, add it as a new option,
+                // otherwise update the old option with the URL and enable it
+                if (existing_id == -1) {
+                    var option = $("<option value='" + version_url + "'>" + each + "</option>");
+                    version_selector.append(option);
+                } else {
+                    var option = existing_versions[existing_id];
+                    option.value = version_url;
+                    option.disabled = false;
+                }
+            });
+        }
+
+        // only show the version selector if the selector has been populated
+        if ($('#version-selector > option').length > 0) {
+            version_selector.css("visibility", "visible");
+        }
+    })
+
+})
+
+// mobile
+require(['jquery', 'headroom'], function($, Headroom) {
+    $(document).ready(function() {
+        var navtoc = $("nav.toc");
+        $("nav.toc li.current a.toctext").click(function() {
+            navtoc.toggleClass('show');
+        });
+        $("article > header div#topbar a.fa-bars").click(function(ev) {
+            ev.preventDefault();
+            navtoc.toggleClass('show');
+            if (navtoc.hasClass('show')) {
+                var title = $("article > header div#topbar span").text();
+                $("nav.toc ul li a:contains('" + title + "')").focus();
+            }
+        });
+        $("article#docs").bind('click', function(ev) {
+            if ($(ev.target).is('div#topbar a.fa-bars')) {
+                return;
+            }
+            if (navtoc.hasClass('show')) {
+                navtoc.removeClass('show');
+            }
+        });
+        if ($("article > header div#topbar").css('display') == 'block') {
+            var headroom = new Headroom(document.querySelector("article > header div#topbar"), {"tolerance": {"up": 10, "down": 10}});
+            headroom.init();
+        }
+    })
+})
--- a/v0.3.0/assets/search.js
+++ b/v0.3.0/assets/search.js
@ -0,0 +1,91 @@
+/*
+ * Part of Documenter.jl
+ *     https://github.com/JuliaDocs/Documenter.jl
+ *
+ * License: MIT
+ */
+
+// parseUri 1.2.2
+// (c) Steven Levithan <stevenlevithan.com>
+// MIT License
+function parseUri (str) {
+	var	o   = parseUri.options,
+		m   = o.parser[o.strictMode ? "strict" : "loose"].exec(str),
+		uri = {},
+		i   = 14;
+
+	while (i--) uri[o.key[i]] = m[i] || "";
+
+	uri[o.q.name] = {};
+	uri[o.key[12]].replace(o.q.parser, function ($0, $1, $2) {
+		if ($1) uri[o.q.name][$1] = $2;
+	});
+
+	return uri;
+};
+parseUri.options = {
+	strictMode: false,
+	key: ["source","protocol","authority","userInfo","user","password","host","port","relative","path","directory","file","query","anchor"],
+	q:   {
+		name:   "queryKey",
+		parser: /(?:^|&)([^&=]*)=?([^&]*)/g
+	},
+	parser: {
+		strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
+		loose:  /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/
+	}
+};
+
+requirejs.config({
+    paths: {
+        'jquery': 'https://code.jquery.com/jquery-3.1.0.js?',
+        'lunr': 'https://cdnjs.cloudflare.com/ajax/libs/lunr.js/0.7.1/lunr.min',
+    }
+});
+
+var currentScript = document.currentScript;
+
+require(["jquery", "lunr"], function($, lunr) {
+    var index = lunr(function () {
+        this.ref('location')
+        this.field('title', {boost: 10})
+        this.field('text')
+    })
+    var store = {}
+
+    documenterSearchIndex['docs'].forEach(function(e) {
+        index.add(e)
+        store[e.location] = e
+    })
+
+    $(function(){
+        function update_search(query) {
+            results = index.search(query)
+            $('#search-info').text("Number of results: " + results.length)
+            $('#search-results').empty()
+            results.forEach(function(result) {
+                data = store[result.ref]
+                link = $('<a>')
+                link.text(data.title)
+                link.attr('href', documenterBaseURL+'/'+result.ref)
+                cat = $('<span class="category">('+data.category+')</span>')
+                li = $('<li>').append(link).append(cat)
+                $('#search-results').append(li)
+            })
+        }
+
+        function update_search_box() {
+            query = $('#search-query').val()
+            update_search(query)
+        }
+
+        $('#search-query').keyup(update_search_box)
+        $('#search-query').change(update_search_box)
+
+        search_query = parseUri(window.location).queryKey["q"]
+        if(search_query !== undefined) {
+            $("#search-query").val(search_query)
+        }
+        update_search_box();
+    })
+})
--- a/v0.3.0/contributing.html
+++ b/v0.3.0/contributing.html
@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Contributing &amp; Help · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li class="current"><a class="toctext" href="contributing.html">Contributing &amp; Help</a><ul class="internal"></ul></li></ul></nav><article id="docs"><header><nav><ul><li><a href="contributing.html">Contributing &amp; Help</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/contributing.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Contributing &amp; Help</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Contributing-and-Help-1" href="#Contributing-and-Help-1">Contributing &amp; Help</a></h1><p>If you need help, please ask on the <a href="https://discourse.julialang.org/">Julia forum</a>, the <a href="https://discourse.julialang.org/t/announcing-a-julia-slack/4866">slack</a> (channel #machine-learning), or Flux&#39;s <a href="https://gitter.im/FluxML/Lobby">Gitter</a>.</p><p>Right now, the best way to help out is to try out the examples and report any issues or missing features as you find them. The second best way is to help us spread the word, perhaps by <a href="https://github.com/MikeInnes/Flux.jl">starring the repo</a>.</p><p>If you&#39;re interested in hacking on Flux, most of the <a href="https://github.com/MikeInnes/Flux.jl/tree/master/src">code</a> is pretty straightforward. Adding new <a href="https://github.com/MikeInnes/Flux.jl/tree/master/src/layers">layer definitions</a> or cost functions is simple using the Flux DSL itself, and things like data utilities and training processes are all plain Julia code.</p><p>If you get stuck or need anything, let us know!</p><footer><hr/><a class="previous" href="data/onehot.html"><span class="direction">Previous</span><span class="title">One-Hot Encoding</span></a></footer></article></body></html>
--- a/v0.3.0/data/onehot.html
+++ b/v0.3.0/data/onehot.html
@ -0,0 +1,40 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>One-Hot Encoding · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li class="current"><a class="toctext" href="onehot.html">One-Hot Encoding</a><ul class="internal"><li><a class="toctext" href="#Batches-1">Batches</a></li></ul></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Data Munging</li><li><a href="onehot.html">One-Hot Encoding</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/data/onehot.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>One-Hot Encoding</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="One-Hot-Encoding-1" href="#One-Hot-Encoding-1">One-Hot Encoding</a></h1><p>It&#39;s common to encode categorical variables (like <code>true</code>, <code>false</code> or <code>cat</code>, <code>dog</code>) in &quot;one-of-k&quot; or <a href="https://en.wikipedia.org/wiki/One-hot">&quot;one-hot&quot;</a> form. Flux provides the <code>onehot</code> function to make this easy.</p><pre><code class="language-none">julia&gt; using Flux: onehot
+
+julia&gt; onehot(:b, [:a, :b, :c])
+3-element Flux.OneHotVector:
+ false
+  true
+ false
+
+julia&gt; onehot(:c, [:a, :b, :c])
+3-element Flux.OneHotVector:
+ false
+ false
+  true</code></pre><p>The inverse is <code>argmax</code> (which can take a general probability distribution, as well as just booleans).</p><pre><code class="language-julia">julia&gt; argmax(ans, [:a, :b, :c])
+:c
+
+julia&gt; argmax([true, false, false], [:a, :b, :c])
+:a
+
+julia&gt; argmax([0.3, 0.2, 0.5], [:a, :b, :c])
+:c</code></pre><h2><a class="nav-anchor" id="Batches-1" href="#Batches-1">Batches</a></h2><p><code>onehotbatch</code> creates a batch (matrix) of one-hot vectors, and <code>argmax</code> treats matrices as batches.</p><pre><code class="language-julia">julia&gt; using Flux: onehotbatch
+
+julia&gt; onehotbatch([:b, :a, :b], [:a, :b, :c])
+3×3 Flux.OneHotMatrix:
+ false   true  false
+  true  false   true
+ false  false  false
+
+julia&gt; onecold(ans, [:a, :b, :c])
+3-element Array{Symbol,1}:
+  :b
+  :a
+  :b</code></pre><p>Note that these operations returned <code>OneHotVector</code> and <code>OneHotMatrix</code> rather than <code>Array</code>s. <code>OneHotVector</code>s behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly.. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood.</p><footer><hr/><a class="previous" href="../training/training.html"><span class="direction">Previous</span><span class="title">Training</span></a><a class="next" href="../contributing.html"><span class="direction">Next</span><span class="title">Contributing &amp; Help</span></a></footer></article></body></html>
--- a/v0.3.0/index.html
+++ b/v0.3.0/index.html
@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Home · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li class="current"><a class="toctext" href="index.html">Home</a><ul class="internal"><li class="toplevel"><a class="toctext" href="#Installation-1">Installation</a></li></ul></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href="index.html">Home</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/index.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Home</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Flux:-The-Julia-Machine-Learning-Library-1" href="#Flux:-The-Julia-Machine-Learning-Library-1">Flux: The Julia Machine Learning Library</a></h1><p>Flux is a library for machine learning. It comes &quot;batteries-included&quot; with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the <a href="https://github.com/FluxML/CuArrays.jl">GPU kernels</a>) and any part can be tweaked to your liking.</p><h1><a class="nav-anchor" id="Installation-1" href="#Installation-1">Installation</a></h1><p>Install <a href="https://julialang.org/downloads/">Julia 0.6.0 or later</a>, if you haven&#39;t already.</p><pre><code class="language-julia">Pkg.add(&quot;Flux&quot;)
+Pkg.test(&quot;Flux&quot;) # Check things installed correctly</code></pre><p>Start with the <a href="basics.html">basics</a>. The <a href="https://github.com/FluxML/model-zoo/">model zoo</a> is also a good starting point for many common kinds of models.</p><footer><hr/><a class="next" href="models/basics.html"><span class="direction">Next</span><span class="title">Basics</span></a></footer></article></body></html>
--- a/v0.3.0/models/basics.html
+++ b/v0.3.0/models/basics.html
@ -0,0 +1,78 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Basics · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li class="current"><a class="toctext" href="basics.html">Basics</a><ul class="internal"><li><a class="toctext" href="#Taking-Gradients-1">Taking Gradients</a></li><li><a class="toctext" href="#Building-Layers-1">Building Layers</a></li><li><a class="toctext" href="#Stacking-It-Up-1">Stacking It Up</a></li></ul></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li><a class="toctext" href="layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="basics.html">Basics</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/basics.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Basics</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Model-Building-Basics-1" href="#Model-Building-Basics-1">Model-Building Basics</a></h1><h2><a class="nav-anchor" id="Taking-Gradients-1" href="#Taking-Gradients-1">Taking Gradients</a></h2><p>Consider a simple linear regression, which tries to predict an output array <code>y</code> from an input <code>x</code>. (It&#39;s a good idea to follow this example in the Julia repl.)</p><pre><code class="language-julia">W = rand(2, 5)
+b = rand(2)
+
+predict(x) = W*x .+ b
+loss(x, y) = sum((predict(x) .- y).^2)
+
+x, y = rand(5), rand(2) # Dummy data
+loss(x, y) # ~ 3</code></pre><p>To improve the prediction we can take the gradients of <code>W</code> and <code>b</code> with respect to the loss function and perform gradient descent. We could calculate gradients by hand, but Flux will do it for us if we tell it that <code>W</code> and <code>b</code> are trainable <em>parameters</em>.</p><pre><code class="language-julia">using Flux.Tracker: param, back!, data, grad
+
+W = param(W)
+b = param(b)
+
+l = loss(x, y)
+
+back!(l)</code></pre><p><code>loss(x, y)</code> returns the same number, but it&#39;s now a <em>tracked</em> value that records gradients as it goes along. Calling <code>back!</code> then calculates the gradient of <code>W</code> and <code>b</code>. We can see what this gradient is, and modify <code>W</code> to train the model.</p><pre><code class="language-julia">grad(W)
+
+W.data .-= 0.1grad(W)
+
+loss(x, y) # ~ 2.5</code></pre><p>The loss has decreased a little, meaning that our prediction <code>x</code> is closer to the target <code>y</code>. If we have some data we can already try <a href="training/training.html">training the model</a>.</p><p>All deep learning in Flux, however complex, is a simple generalisation of this example. Of course, models can <em>look</em> very different – they might have millions of parameters or complex control flow, and there are ways to manage this complexity. Let&#39;s see what that looks like.</p><h2><a class="nav-anchor" id="Building-Layers-1" href="#Building-Layers-1">Building Layers</a></h2><p>It&#39;s common to create more complex models than the linear regression above. For example, we might want to have two linear layers with a nonlinearity like <a href="https://en.wikipedia.org/wiki/Sigmoid_function">sigmoid</a> (<code>σ</code>) in between them. In the above style we could write this as:</p><pre><code class="language-julia">W1 = param(rand(3, 5))
+b1 = param(rand(3))
+layer1(x) = W1 * x .+ b1
+
+W2 = param(rand(2, 3))
+b2 = param(rand(2))
+layer2(x) = W2 * x .+ b2
+
+model(x) = layer2(σ.(layer1(x)))
+
+model(rand(5)) # =&gt; 2-element vector</code></pre><p>This works but is fairly unwieldy, with a lot of repetition – especially as we add more layers. One way to factor this out is to create a function that returns linear layers.</p><pre><code class="language-julia">function linear(in, out)
+  W = param(randn(out, in))
+  b = param(randn(out))
+  x -&gt; W * x .+ b
+end
+
+linear1 = linear(5, 3) # we can access linear1.W etc
+linear2 = linear(3, 2)
+
+model(x) = linear2(σ.(linear1(x)))
+
+model(x) # =&gt; 2-element vector</code></pre><p>Another (equivalent) way is to create a struct that explicitly represents the affine layer.</p><pre><code class="language-julia">struct Affine
+  W
+  b
+end
+
+Affine(in::Integer, out::Integer) =
+  Affine(param(randn(out, in)), param(randn(out)))
+
+# Overload call, so the object can be used as a function
+(m::Affine)(x) = m.W * x .+ m.b
+
+a = Affine(10, 5)
+
+a(rand(10)) # =&gt; 5-element vector</code></pre><p>Congratulations! You just built the <code>Dense</code> layer that comes with Flux. Flux has many interesting layers available, but they&#39;re all things you could have built yourself very easily.</p><p>(There is one small difference with <code>Dense</code> – for convenience it also takes an activation function, like <code>Dense(10, 5, σ)</code>.)</p><h2><a class="nav-anchor" id="Stacking-It-Up-1" href="#Stacking-It-Up-1">Stacking It Up</a></h2><p>It&#39;s pretty common to write models that look something like:</p><pre><code class="language-julia">layer1 = Dense(10, 5, σ)
+# ...
+model(x) = layer3(layer2(layer1(x)))</code></pre><p>For long chains, it might be a bit more intuitive to have a list of layers, like this:</p><pre><code class="language-julia">using Flux
+
+layers = [Dense(10, 5, σ), Dense(5, 2), softmax]
+
+model(x) = foldl((x, m) -&gt; m(x), x, layers)
+
+model(rand(10)) # =&gt; 2-element vector</code></pre><p>Handily, this is also provided for in Flux:</p><pre><code class="language-julia">model2 = Chain(
+  Dense(10, 5, σ),
+  Dense(5, 2),
+  softmax)
+
+model2(rand(10)) # =&gt; 2-element vector</code></pre><p>This quickly starts to look like a high-level deep learning library; yet you can see how it falls out of simple abstractions, and we lose none of the power of Julia code.</p><p>A nice property of this approach is that because &quot;models&quot; are just functions (possibly with trainable parameters), you can also see this as simple function composition.</p><pre><code class="language-julia">m = Dense(5, 2) ∘ Dense(10, 5, σ)
+
+m(rand(10))</code></pre><p>Likewise, <code>Chain</code> will happily work with any Julia function.</p><pre><code class="language-julia">m = Chain(x -&gt; x^2, x -&gt; x+1)
+
+m(5) # =&gt; 26</code></pre><footer><hr/><a class="previous" href="../index.html"><span class="direction">Previous</span><span class="title">Home</span></a><a class="next" href="recurrence.html"><span class="direction">Next</span><span class="title">Recurrence</span></a></footer></article></body></html>
--- a/v0.3.0/models/layers.html
+++ b/v0.3.0/models/layers.html
@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Layer Reference · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li><a class="toctext" href="recurrence.html">Recurrence</a></li><li class="current"><a class="toctext" href="layers.html">Layer Reference</a><ul class="internal"><li><a class="toctext" href="#Model-Layers-1">Model Layers</a></li></ul></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="layers.html">Layer Reference</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/layers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Layer Reference</span><a class="fa fa-bars" href="#"></a></div></header><h2><a class="nav-anchor" id="Model-Layers-1" href="#Model-Layers-1">Model Layers</a></h2><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Chain" href="#Flux.Chain"><code>Flux.Chain</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Chain(layers...)</code></pre><p>Chain multiple layers / functions together, so that they are called in sequence on a given input.</p><pre><code class="language-none">m = Chain(x -&gt; x^2, x -&gt; x+1)
+m(5) == 26
+
+m = Chain(Dense(10, 5), Dense(5, 2))
+x = rand(10)
+m(x) == m[2](m[1](x))</code></pre><p><code>Chain</code> also supports indexing and slicing, e.g. <code>m[2]</code> or <code>m[1:end-1]</code>. <code>m[1:3](x)</code> will calculate the output of the first three layers.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/src/layers/basic.jl#L1-L16">source</a><br/></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Dense" href="#Flux.Dense"><code>Flux.Dense</code></a> — <span class="docstring-category">Type</span>.</div><div><pre><code class="language-none">Dense(in::Integer, out::Integer, σ = identity)</code></pre><p>Creates a traditional <code>Dense</code> layer with parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-none">y = σ.(W * x .+ b)</code></pre><p>The input <code>x</code> must be a vector of length <code>in</code>, or a batch of vectors represented as an <code>in × N</code> matrix. The out <code>y</code> will be a vector or batch of length <code>in</code>.</p></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/src/layers/basic.jl#L40-L49">source</a><br/></section><footer><hr/><a class="previous" href="recurrence.html"><span class="direction">Previous</span><span class="title">Recurrence</span></a><a class="next" href="../training/optimisers.html"><span class="direction">Next</span><span class="title">Optimisers</span></a></footer></article></body></html>
--- a/v0.3.0/models/recurrence.html
+++ b/v0.3.0/models/recurrence.html
@ -0,0 +1,43 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Recurrence · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="basics.html">Basics</a></li><li class="current"><a class="toctext" href="recurrence.html">Recurrence</a><ul class="internal"><li><a class="toctext" href="#Recurrent-Cells-1">Recurrent Cells</a></li><li><a class="toctext" href="#Stateful-Models-1">Stateful Models</a></li><li><a class="toctext" href="#Sequences-1">Sequences</a></li><li><a class="toctext" href="#Truncating-Gradients-1">Truncating Gradients</a></li></ul></li><li><a class="toctext" href="layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="../training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href="recurrence.html">Recurrence</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/models/recurrence.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Recurrence</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Recurrent-Models-1" href="#Recurrent-Models-1">Recurrent Models</a></h1><h2><a class="nav-anchor" id="Recurrent-Cells-1" href="#Recurrent-Cells-1">Recurrent Cells</a></h2><p>In the simple feedforward case, our model <code>m</code> is a simple function from various inputs <code>xᵢ</code> to predictions <code>yᵢ</code>. (For example, each <code>x</code> might be an MNIST digit and each <code>y</code> a digit label.) Each prediction is completely independent of any others, and using the same <code>x</code> will always produce the same <code>y</code>.</p><pre><code class="language-julia">y₁ = f(x₁)
+y₂ = f(x₂)
+y₃ = f(x₃)
+# ...</code></pre><p>Recurrent networks introduce a <em>hidden state</em> that gets carried over each time we run the model. The model now takes the old <code>h</code> as an input, and produces a new <code>h</code> as output, each time we run it.</p><pre><code class="language-julia">h = # ... initial state ...
+y₁, h = f(x₁, h)
+y₂, h = f(x₂, h)
+y₃, h = f(x₃, h)
+# ...</code></pre><p>Information stored in <code>h</code> is preserved for the next prediction, allowing it to function as a kind of memory. This also means that the prediction made for a given <code>x</code> depends on all the inputs previously fed into the model.</p><p>(This might be important if, for example, each <code>x</code> represents one word of a sentence; the model&#39;s interpretation of the word &quot;bank&quot; should change if the previous input was &quot;river&quot; rather than &quot;investment&quot;.)</p><p>Flux&#39;s RNN support closely follows this mathematical perspective. The most basic RNN is as close as possible to a standard <code>Dense</code> layer, and the output and hidden state are the same. By convention, the hidden state is the first input and output.</p><pre><code class="language-julia">Wxh = randn(5, 10)
+Whh = randn(5, 5)
+b   = randn(5)
+
+function rnn(h, x)
+  h = tanh.(Wxh * x .+ Whh * h .+ b)
+  return h, h
+end
+
+x = rand(10) # dummy data
+h = rand(5)  # initial hidden state
+
+h, y = rnn(h, x)</code></pre><p>If you run the last line a few times, you&#39;ll notice the output <code>y</code> changing slightly even though the input <code>x</code> is the same.</p><p>We sometimes refer to functions like <code>rnn</code> above, which explicitly manage state, as recurrent <em>cells</em>. There are various recurrent cells available, which are documented in the <a href="models/layers.html">layer reference</a>. The hand-written example above can be replaced with:</p><pre><code class="language-julia">using Flux
+
+m = Flux.RNNCell(10, 5)
+
+x = rand(10) # dummy data
+h = rand(5)  # initial hidden state
+
+h, y = rnn(h, x)</code></pre><h2><a class="nav-anchor" id="Stateful-Models-1" href="#Stateful-Models-1">Stateful Models</a></h2><p>For the most part, we don&#39;t want to manage hidden states ourselves, but to treat our models as being stateful. Flux provides the <code>Recur</code> wrapper to do this.</p><pre><code class="language-julia">x = rand(10)
+h = rand(5)
+
+m = Flux.Recur(rnn, h)
+
+y = m(x)</code></pre><p>The <code>Recur</code> wrapper stores the state between runs in the <code>m.state</code> field.</p><p>If you use the <code>RNN(10, 5)</code> constructor – as opposed to <code>RNNCell</code> – you&#39;ll see that it&#39;s simply a wrapped cell.</p><pre><code class="language-julia">julia&gt; RNN(10, 5)
+Recur(RNNCell(Dense(15, 5)))</code></pre><h2><a class="nav-anchor" id="Sequences-1" href="#Sequences-1">Sequences</a></h2><p>Often we want to work with sequences of inputs, rather than individual <code>x</code>s.</p><pre><code class="language-julia">seq = [rand(10) for i = 1:10]</code></pre><p>With <code>Recur</code>, applying our model to each element of a sequence is trivial:</p><pre><code class="language-julia">map(m, seq) # returns a list of 5-element vectors</code></pre><p>To make this a bit more convenient, Flux has the <code>Seq</code> type. This is just a list, but tagged so that we know it&#39;s meant to be used as a sequence of data points.</p><pre><code class="language-julia">seq = Seq([rand(10) for i = 1:10])
+m(seq) # returns a new Seq of length 10</code></pre><p>When we apply the model <code>m</code> to a seq, it gets mapped over every item in the sequence in order. This is just like the code above, but often more convenient.</p><p>You can get this behaviour more generally with the <code>Over</code> wrapper.</p><pre><code class="language-julia">m = Over(Dense(10,5))
+m(seq) # returns a new Seq of length 10</code></pre><h2><a class="nav-anchor" id="Truncating-Gradients-1" href="#Truncating-Gradients-1">Truncating Gradients</a></h2><p>By default, calculating the gradients in a recurrent layer involves the entire history. For example, if we call the model on 100 inputs, calling <code>back!</code> will calculate the gradient for those 100 calls. If we then calculate another 10 inputs we have to calculate 110 gradients – this accumulates and quickly becomes expensive.</p><p>To avoid this we can <em>truncate</em> the gradient calculation, forgetting the history.</p><pre><code class="language-julia">truncate!(m)</code></pre><p>Calling <code>truncate!</code> wipes the slate clean, so we can call the model with more inputs without building up an expensive gradient computation.</p><footer><hr/><a class="previous" href="basics.html"><span class="direction">Previous</span><span class="title">Basics</span></a><a class="next" href="layers.html"><span class="direction">Next</span><span class="title">Layer Reference</span></a></footer></article></body></html>
--- a/v0.3.0/search.html
+++ b/v0.3.0/search.html
@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Search · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="assets/documenter.js"></script><script src="siteinfo.js"></script><script src="../versions.js"></script><link href="assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="models/basics.html">Basics</a></li><li><a class="toctext" href="models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="training/optimisers.html">Optimisers</a></li><li><a class="toctext" href="training/training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="contributing.html">Contributing &amp; Help</a></li></ul></nav><article><header><nav><ul><li>Search</li></ul></nav><hr/><div id="topbar"><span>Search</span><a class="fa fa-bars" href="#"></a></div></header><h1>Search</h1><p id="search-info">Number of results: <span id="search-results-number">loading...</span></p><ul id="search-results"></ul></article></body><script src="search_index.js"></script><script src="assets/search.js"></script></html>
--- a/v0.3.0/search_index.js
+++ b/v0.3.0/search_index.js
@ -0,0 +1,235 @@
+var documenterSearchIndex = {"docs": [
+
+{
+    "location": "index.html#",
+    "page": "Home",
+    "title": "Home",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "index.html#Flux:-The-Julia-Machine-Learning-Library-1",
+    "page": "Home",
+    "title": "Flux: The Julia Machine Learning Library",
+    "category": "section",
+    "text": "Flux is a library for machine learning. It comes \"batteries-included\" with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the GPU kernels) and any part can be tweaked to your liking."
+},
+
+{
+    "location": "index.html#Installation-1",
+    "page": "Home",
+    "title": "Installation",
+    "category": "section",
+    "text": "Install Julia 0.6.0 or later, if you haven't already.Pkg.add(\"Flux\")\nPkg.test(\"Flux\") # Check things installed correctlyStart with the basics. The model zoo is also a good starting point for many common kinds of models."
+},
+
+{
+    "location": "models/basics.html#",
+    "page": "Basics",
+    "title": "Basics",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "models/basics.html#Model-Building-Basics-1",
+    "page": "Basics",
+    "title": "Model-Building Basics",
+    "category": "section",
+    "text": ""
+},
+
+{
+    "location": "models/basics.html#Taking-Gradients-1",
+    "page": "Basics",
+    "title": "Taking Gradients",
+    "category": "section",
+    "text": "Consider a simple linear regression, which tries to predict an output array y from an input x. (It's a good idea to follow this example in the Julia repl.)W = rand(2, 5)\nb = rand(2)\n\npredict(x) = W*x .+ b\nloss(x, y) = sum((predict(x) .- y).^2)\n\nx, y = rand(5), rand(2) # Dummy data\nloss(x, y) # ~ 3To improve the prediction we can take the gradients of W and b with respect to the loss function and perform gradient descent. We could calculate gradients by hand, but Flux will do it for us if we tell it that W and b are trainable parameters.using Flux.Tracker: param, back!, data, grad\n\nW = param(W)\nb = param(b)\n\nl = loss(x, y)\n\nback!(l)loss(x, y) returns the same number, but it's now a tracked value that records gradients as it goes along. Calling back! then calculates the gradient of W and b. We can see what this gradient is, and modify W to train the model.grad(W)\n\nW.data .-= 0.1grad(W)\n\nloss(x, y) # ~ 2.5The loss has decreased a little, meaning that our prediction x is closer to the target y. If we have some data we can already try training the model.All deep learning in Flux, however complex, is a simple generalisation of this example. Of course, models can look very different – they might have millions of parameters or complex control flow, and there are ways to manage this complexity. Let's see what that looks like."
+},
+
+{
+    "location": "models/basics.html#Building-Layers-1",
+    "page": "Basics",
+    "title": "Building Layers",
+    "category": "section",
+    "text": "It's common to create more complex models than the linear regression above. For example, we might want to have two linear layers with a nonlinearity like sigmoid (σ) in between them. In the above style we could write this as:W1 = param(rand(3, 5))\nb1 = param(rand(3))\nlayer1(x) = W1 * x .+ b1\n\nW2 = param(rand(2, 3))\nb2 = param(rand(2))\nlayer2(x) = W2 * x .+ b2\n\nmodel(x) = layer2(σ.(layer1(x)))\n\nmodel(rand(5)) # => 2-element vectorThis works but is fairly unwieldy, with a lot of repetition – especially as we add more layers. One way to factor this out is to create a function that returns linear layers.function linear(in, out)\n  W = param(randn(out, in))\n  b = param(randn(out))\n  x -> W * x .+ b\nend\n\nlinear1 = linear(5, 3) # we can access linear1.W etc\nlinear2 = linear(3, 2)\n\nmodel(x) = linear2(σ.(linear1(x)))\n\nmodel(x) # => 2-element vectorAnother (equivalent) way is to create a struct that explicitly represents the affine layer.struct Affine\n  W\n  b\nend\n\nAffine(in::Integer, out::Integer) =\n  Affine(param(randn(out, in)), param(randn(out)))\n\n# Overload call, so the object can be used as a function\n(m::Affine)(x) = m.W * x .+ m.b\n\na = Affine(10, 5)\n\na(rand(10)) # => 5-element vectorCongratulations! You just built the Dense layer that comes with Flux. Flux has many interesting layers available, but they're all things you could have built yourself very easily.(There is one small difference with Dense – for convenience it also takes an activation function, like Dense(10, 5, σ).)"
+},
+
+{
+    "location": "models/basics.html#Stacking-It-Up-1",
+    "page": "Basics",
+    "title": "Stacking It Up",
+    "category": "section",
+    "text": "It's pretty common to write models that look something like:layer1 = Dense(10, 5, σ)\n# ...\nmodel(x) = layer3(layer2(layer1(x)))For long chains, it might be a bit more intuitive to have a list of layers, like this:using Flux\n\nlayers = [Dense(10, 5, σ), Dense(5, 2), softmax]\n\nmodel(x) = foldl((x, m) -> m(x), x, layers)\n\nmodel(rand(10)) # => 2-element vectorHandily, this is also provided for in Flux:model2 = Chain(\n  Dense(10, 5, σ),\n  Dense(5, 2),\n  softmax)\n\nmodel2(rand(10)) # => 2-element vectorThis quickly starts to look like a high-level deep learning library; yet you can see how it falls out of simple abstractions, and we lose none of the power of Julia code.A nice property of this approach is that because \"models\" are just functions (possibly with trainable parameters), you can also see this as simple function composition.m = Dense(5, 2) ∘ Dense(10, 5, σ)\n\nm(rand(10))Likewise, Chain will happily work with any Julia function.m = Chain(x -> x^2, x -> x+1)\n\nm(5) # => 26"
+},
+
+{
+    "location": "models/recurrence.html#",
+    "page": "Recurrence",
+    "title": "Recurrence",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "models/recurrence.html#Recurrent-Models-1",
+    "page": "Recurrence",
+    "title": "Recurrent Models",
+    "category": "section",
+    "text": ""
+},
+
+{
+    "location": "models/recurrence.html#Recurrent-Cells-1",
+    "page": "Recurrence",
+    "title": "Recurrent Cells",
+    "category": "section",
+    "text": "In the simple feedforward case, our model m is a simple function from various inputs xᵢ to predictions yᵢ. (For example, each x might be an MNIST digit and each y a digit label.) Each prediction is completely independent of any others, and using the same x will always produce the same y.y₁ = f(x₁)\ny₂ = f(x₂)\ny₃ = f(x₃)\n# ...Recurrent networks introduce a hidden state that gets carried over each time we run the model. The model now takes the old h as an input, and produces a new h as output, each time we run it.h = # ... initial state ...\ny₁, h = f(x₁, h)\ny₂, h = f(x₂, h)\ny₃, h = f(x₃, h)\n# ...Information stored in h is preserved for the next prediction, allowing it to function as a kind of memory. This also means that the prediction made for a given x depends on all the inputs previously fed into the model.(This might be important if, for example, each x represents one word of a sentence; the model's interpretation of the word \"bank\" should change if the previous input was \"river\" rather than \"investment\".)Flux's RNN support closely follows this mathematical perspective. The most basic RNN is as close as possible to a standard Dense layer, and the output and hidden state are the same. By convention, the hidden state is the first input and output.Wxh = randn(5, 10)\nWhh = randn(5, 5)\nb   = randn(5)\n\nfunction rnn(h, x)\n  h = tanh.(Wxh * x .+ Whh * h .+ b)\n  return h, h\nend\n\nx = rand(10) # dummy data\nh = rand(5)  # initial hidden state\n\nh, y = rnn(h, x)If you run the last line a few times, you'll notice the output y changing slightly even though the input x is the same.We sometimes refer to functions like rnn above, which explicitly manage state, as recurrent cells. There are various recurrent cells available, which are documented in the layer reference. The hand-written example above can be replaced with:using Flux\n\nm = Flux.RNNCell(10, 5)\n\nx = rand(10) # dummy data\nh = rand(5)  # initial hidden state\n\nh, y = rnn(h, x)"
+},
+
+{
+    "location": "models/recurrence.html#Stateful-Models-1",
+    "page": "Recurrence",
+    "title": "Stateful Models",
+    "category": "section",
+    "text": "For the most part, we don't want to manage hidden states ourselves, but to treat our models as being stateful. Flux provides the Recur wrapper to do this.x = rand(10)\nh = rand(5)\n\nm = Flux.Recur(rnn, h)\n\ny = m(x)The Recur wrapper stores the state between runs in the m.state field.If you use the RNN(10, 5) constructor – as opposed to RNNCell – you'll see that it's simply a wrapped cell.julia> RNN(10, 5)\nRecur(RNNCell(Dense(15, 5)))"
+},
+
+{
+    "location": "models/recurrence.html#Sequences-1",
+    "page": "Recurrence",
+    "title": "Sequences",
+    "category": "section",
+    "text": "Often we want to work with sequences of inputs, rather than individual xs.seq = [rand(10) for i = 1:10]With Recur, applying our model to each element of a sequence is trivial:map(m, seq) # returns a list of 5-element vectorsTo make this a bit more convenient, Flux has the Seq type. This is just a list, but tagged so that we know it's meant to be used as a sequence of data points.seq = Seq([rand(10) for i = 1:10])\nm(seq) # returns a new Seq of length 10When we apply the model m to a seq, it gets mapped over every item in the sequence in order. This is just like the code above, but often more convenient.You can get this behaviour more generally with the Over wrapper.m = Over(Dense(10,5))\nm(seq) # returns a new Seq of length 10"
+},
+
+{
+    "location": "models/recurrence.html#Truncating-Gradients-1",
+    "page": "Recurrence",
+    "title": "Truncating Gradients",
+    "category": "section",
+    "text": "By default, calculating the gradients in a recurrent layer involves the entire history. For example, if we call the model on 100 inputs, calling back! will calculate the gradient for those 100 calls. If we then calculate another 10 inputs we have to calculate 110 gradients – this accumulates and quickly becomes expensive.To avoid this we can truncate the gradient calculation, forgetting the history.truncate!(m)Calling truncate! wipes the slate clean, so we can call the model with more inputs without building up an expensive gradient computation."
+},
+
+{
+    "location": "models/layers.html#",
+    "page": "Layer Reference",
+    "title": "Layer Reference",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "models/layers.html#Flux.Chain",
+    "page": "Layer Reference",
+    "title": "Flux.Chain",
+    "category": "Type",
+    "text": "Chain(layers...)\n\nChain multiple layers / functions together, so that they are called in sequence on a given input.\n\nm = Chain(x -> x^2, x -> x+1)\nm(5) == 26\n\nm = Chain(Dense(10, 5), Dense(5, 2))\nx = rand(10)\nm(x) == m[2](m[1](x))\n\nChain also supports indexing and slicing, e.g. m[2] or m[1:end-1]. m[1:3](x) will calculate the output of the first three layers.\n\n\n\n"
+},
+
+{
+    "location": "models/layers.html#Flux.Dense",
+    "page": "Layer Reference",
+    "title": "Flux.Dense",
+    "category": "Type",
+    "text": "Dense(in::Integer, out::Integer, σ = identity)\n\nCreates a traditional Dense layer with parameters W and b.\n\ny = σ.(W * x .+ b)\n\nThe input x must be a vector of length in, or a batch of vectors represented as an in × N matrix. The out y will be a vector or batch of length in.\n\n\n\n"
+},
+
+{
+    "location": "models/layers.html#Model-Layers-1",
+    "page": "Layer Reference",
+    "title": "Model Layers",
+    "category": "section",
+    "text": "Chain\nDense"
+},
+
+{
+    "location": "training/optimisers.html#",
+    "page": "Optimisers",
+    "title": "Optimisers",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "training/optimisers.html#Optimisers-1",
+    "page": "Optimisers",
+    "title": "Optimisers",
+    "category": "section",
+    "text": "Consider a simple linear regression. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters W and b.W = param(rand(2, 5))\nb = param(rand(2))\n\npredict(x) = W*x .+ b\nloss(x, y) = sum((predict(x) .- y).^2)\n\nx, y = rand(5), rand(2) # Dummy data\nl = loss(x, y) # ~ 3\nback!(l)We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here's one way to do that:using Flux.Tracker: data, grad\n\nfunction update()\n  η = 0.1 # Learning Rate\n  for p in (W, b)\n    x, Δ = data(p), grad(p)\n    x .-= η .* Δ # Apply the update\n    Δ .= 0       # Clear the gradient\n  end\nendIf we call update, the parameters W and b will change and our loss should go down.There are two pieces here: one is that we need a list of trainable parameters for the model ([W, b] in this case), and the other is the update step. In this case the update is simply gradient descent (x .-= η .* Δ), but we might choose to do something more advanced, like adding momentum.In this case, getting the variables is trivial, but you can imagine it'd be more of a pain with some complex stack of layers.m = Chain(\n  Dense(10, 5, σ),\n  Dense(5, 2), softmax)Instead of having to write [m[1].W, m[1].b, ...], Flux provides a params function params(m) that returns a list of all parameters in the model for you.For the update step, there's nothing whatsoever wrong with writing the loop above – it'll work just fine – but Flux provides various optimisers that make it more convenient.opt = SGD([W, b], 0.1) # Gradient descent with learning rate 0.1\n\nopt()An optimiser takes a parameter list and returns a function that does the same thing as update above. We can pass either opt or update to our training loop, which will then run the optimiser after every mini-batch of data."
+},
+
+{
+    "location": "training/training.html#",
+    "page": "Training",
+    "title": "Training",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "training/training.html#Training-1",
+    "page": "Training",
+    "title": "Training",
+    "category": "section",
+    "text": "To actually train a model we need three things:A loss function, that evaluates how well a model is doing given some input data.\nA collection of data points that will be provided to the loss function.\nAn optimiser that will update the model parameters appropriately.With these we can call Flux.train!:Flux.train!(loss, data, opt)There are plenty of examples in the model zoo."
+},
+
+{
+    "location": "training/training.html#Loss-Functions-1",
+    "page": "Training",
+    "title": "Loss Functions",
+    "category": "section",
+    "text": "The loss that we defined in basics is completely valid for training. We can also define a loss in terms of some model:m = Chain(\n  Dense(784, 32, σ),\n  Dense(32, 10), softmax)\n\nloss(x, y) = Flux.mse(m(x), y)The loss will almost always be defined in terms of some cost function that measures the distance of the prediction m(x) from the target y. Flux has several of these built in, like mse for mean squared error or logloss for cross entropy loss, but you can calculate it however you want."
+},
+
+{
+    "location": "training/training.html#Callbacks-1",
+    "page": "Training",
+    "title": "Callbacks",
+    "category": "section",
+    "text": "train! takes an additional argument, cb, that's used for callbacks so that you can observe the training process. For example:train!(loss, data, opt, cb = () -> println(\"training\"))Callbacks are called for every batch of training data. You can slow this down using Flux.throttle(f, timeout) which prevents f from being called more than once every timeout seconds.A more typical callback might look like this:test_x, test_y = # ... create single batch of test data ...\nevalcb() = @show(loss(test_x, test_y))\n\nFlux.train!(loss, data, opt,\n            cb = throttle(evalcb, 5))"
+},
+
+{
+    "location": "data/onehot.html#",
+    "page": "One-Hot Encoding",
+    "title": "One-Hot Encoding",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "data/onehot.html#One-Hot-Encoding-1",
+    "page": "One-Hot Encoding",
+    "title": "One-Hot Encoding",
+    "category": "section",
+    "text": "It's common to encode categorical variables (like true, false or cat, dog) in \"one-of-k\" or \"one-hot\" form. Flux provides the onehot function to make this easy.julia> using Flux: onehot\n\njulia> onehot(:b, [:a, :b, :c])\n3-element Flux.OneHotVector:\n false\n  true\n false\n\njulia> onehot(:c, [:a, :b, :c])\n3-element Flux.OneHotVector:\n false\n false\n  trueThe inverse is argmax (which can take a general probability distribution, as well as just booleans).julia> argmax(ans, [:a, :b, :c])\n:c\n\njulia> argmax([true, false, false], [:a, :b, :c])\n:a\n\njulia> argmax([0.3, 0.2, 0.5], [:a, :b, :c])\n:c"
+},
+
+{
+    "location": "data/onehot.html#Batches-1",
+    "page": "One-Hot Encoding",
+    "title": "Batches",
+    "category": "section",
+    "text": "onehotbatch creates a batch (matrix) of one-hot vectors, and argmax treats matrices as batches.julia> using Flux: onehotbatch\n\njulia> onehotbatch([:b, :a, :b], [:a, :b, :c])\n3×3 Flux.OneHotMatrix:\n false   true  false\n  true  false   true\n false  false  false\n\njulia> onecold(ans, [:a, :b, :c])\n3-element Array{Symbol,1}:\n  :b\n  :a\n  :bNote that these operations returned OneHotVector and OneHotMatrix rather than Arrays. OneHotVectors behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly.. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood."
+},
+
+{
+    "location": "contributing.html#",
+    "page": "Contributing & Help",
+    "title": "Contributing & Help",
+    "category": "page",
+    "text": ""
+},
+
+{
+    "location": "contributing.html#Contributing-and-Help-1",
+    "page": "Contributing & Help",
+    "title": "Contributing & Help",
+    "category": "section",
+    "text": "If you need help, please ask on the Julia forum, the slack (channel #machine-learning), or Flux's Gitter.Right now, the best way to help out is to try out the examples and report any issues or missing features as you find them. The second best way is to help us spread the word, perhaps by starring the repo.If you're interested in hacking on Flux, most of the code is pretty straightforward. Adding new layer definitions or cost functions is simple using the Flux DSL itself, and things like data utilities and training processes are all plain Julia code.If you get stuck or need anything, let us know!"
+},
+
+]}
--- a/v0.3.0/siteinfo.js
+++ b/v0.3.0/siteinfo.js
@ -0,0 +1 @@
+var DOCUMENTER_CURRENT_VERSION = "v0.3.0";
--- a/v0.3.0/training/optimisers.html
+++ b/v0.3.0/training/optimisers.html
@ -0,0 +1,30 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Optimisers · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li class="current"><a class="toctext" href="optimisers.html">Optimisers</a><ul class="internal"></ul></li><li><a class="toctext" href="training.html">Training</a></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="optimisers.html">Optimisers</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/training/optimisers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Optimisers</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Optimisers-1" href="#Optimisers-1">Optimisers</a></h1><p>Consider a <a href="../models/basics.html">simple linear regression</a>. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-julia">W = param(rand(2, 5))
+b = param(rand(2))
+
+predict(x) = W*x .+ b
+loss(x, y) = sum((predict(x) .- y).^2)
+
+x, y = rand(5), rand(2) # Dummy data
+l = loss(x, y) # ~ 3
+back!(l)</code></pre><p>We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here&#39;s one way to do that:</p><pre><code class="language-julia">using Flux.Tracker: data, grad
+
+function update()
+  η = 0.1 # Learning Rate
+  for p in (W, b)
+    x, Δ = data(p), grad(p)
+    x .-= η .* Δ # Apply the update
+    Δ .= 0       # Clear the gradient
+  end
+end</code></pre><p>If we call <code>update</code>, the parameters <code>W</code> and <code>b</code> will change and our loss should go down.</p><p>There are two pieces here: one is that we need a list of trainable parameters for the model (<code>[W, b]</code> in this case), and the other is the update step. In this case the update is simply gradient descent (<code>x .-= η .* Δ</code>), but we might choose to do something more advanced, like adding momentum.</p><p>In this case, getting the variables is trivial, but you can imagine it&#39;d be more of a pain with some complex stack of layers.</p><pre><code class="language-julia">m = Chain(
+  Dense(10, 5, σ),
+  Dense(5, 2), softmax)</code></pre><p>Instead of having to write <code>[m[1].W, m[1].b, ...]</code>, Flux provides a params function <code>params(m)</code> that returns a list of all parameters in the model for you.</p><p>For the update step, there&#39;s nothing whatsoever wrong with writing the loop above – it&#39;ll work just fine – but Flux provides various <em>optimisers</em> that make it more convenient.</p><pre><code class="language-julia">opt = SGD([W, b], 0.1) # Gradient descent with learning rate 0.1
+
+opt()</code></pre><p>An optimiser takes a parameter list and returns a function that does the same thing as <code>update</code> above. We can pass either <code>opt</code> or <code>update</code> to our <a href="training/training.html">training loop</a>, which will then run the optimiser after every mini-batch of data.</p><footer><hr/><a class="previous" href="../models/layers.html"><span class="direction">Previous</span><span class="title">Layer Reference</span></a><a class="next" href="training.html"><span class="direction">Next</span><span class="title">Training</span></a></footer></article></body></html>
--- a/v0.3.0/training/training.html
+++ b/v0.3.0/training/training.html
@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Training · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-36890222-9', 'auto');
+ga('send', 'pageview');
+</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" action="../search.html"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../index.html">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics.html">Basics</a></li><li><a class="toctext" href="../models/recurrence.html">Recurrence</a></li><li><a class="toctext" href="../models/layers.html">Layer Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="optimisers.html">Optimisers</a></li><li class="current"><a class="toctext" href="training.html">Training</a><ul class="internal"><li><a class="toctext" href="#Loss-Functions-1">Loss Functions</a></li><li><a class="toctext" href="#Callbacks-1">Callbacks</a></li></ul></li></ul></li><li><span class="toctext">Data Munging</span><ul><li><a class="toctext" href="../data/onehot.html">One-Hot Encoding</a></li></ul></li><li><a class="toctext" href="../contributing.html">Contributing &amp; Help</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href="training.html">Training</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/tree/5f24d61ba3abff93cbd42d871d24b6d6f48ce7ae/docs/src/training/training.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Training</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Training-1" href="#Training-1">Training</a></h1><p>To actually train a model we need three things:</p><ul><li><p>A <em>loss function</em>, that evaluates how well a model is doing given some input data.</p></li><li><p>A collection of data points that will be provided to the loss function.</p></li><li><p>An <a href="optimisers.html">optimiser</a> that will update the model parameters appropriately.</p></li></ul><p>With these we can call <code>Flux.train!</code>:</p><pre><code class="language-julia">Flux.train!(loss, data, opt)</code></pre><p>There are plenty of examples in the <a href="https://github.com/FluxML/model-zoo">model zoo</a>.</p><h2><a class="nav-anchor" id="Loss-Functions-1" href="#Loss-Functions-1">Loss Functions</a></h2><p>The <code>loss</code> that we defined in <a href="../models/basics.html">basics</a> is completely valid for training. We can also define a loss in terms of some model:</p><pre><code class="language-julia">m = Chain(
+  Dense(784, 32, σ),
+  Dense(32, 10), softmax)
+
+loss(x, y) = Flux.mse(m(x), y)</code></pre><p>The loss will almost always be defined in terms of some <em>cost function</em> that measures the distance of the prediction <code>m(x)</code> from the target <code>y</code>. Flux has several of these built in, like <code>mse</code> for mean squared error or <code>logloss</code> for cross entropy loss, but you can calculate it however you want.</p><h2><a class="nav-anchor" id="Callbacks-1" href="#Callbacks-1">Callbacks</a></h2><p><code>train!</code> takes an additional argument, <code>cb</code>, that&#39;s used for callbacks so that you can observe the training process. For example:</p><pre><code class="language-julia">train!(loss, data, opt, cb = () -&gt; println(&quot;training&quot;))</code></pre><p>Callbacks are called for every batch of training data. You can slow this down using <code>Flux.throttle(f, timeout)</code> which prevents <code>f</code> from being called more than once every <code>timeout</code> seconds.</p><p>A more typical callback might look like this:</p><pre><code class="language-julia">test_x, test_y = # ... create single batch of test data ...
+evalcb() = @show(loss(test_x, test_y))
+
+Flux.train!(loss, data, opt,
+            cb = throttle(evalcb, 5))</code></pre><footer><hr/><a class="previous" href="optimisers.html"><span class="direction">Previous</span><span class="title">Optimisers</span></a><a class="next" href="../data/onehot.html"><span class="direction">Next</span><span class="title">One-Hot Encoding</span></a></footer></article></body></html>
--- a/versions.js
+++ b/versions.js
@ -1,8 +1,10 @@
 var DOC_VERSIONS = [
  "stable",
  "latest",
+  "release-0.3",
  "release-0.2",
  "release-0.1",
+  "v0.3.0",
  "v0.2.1",
  "v0.2.0",
  "v0.1.1",
				`@ -0,0 +1 @@`
				`var DOCUMENTER_CURRENT_VERSION = "release-0.3";`