build based on f46b524
This commit is contained in:
parent
04fc50b365
commit
fae97cd62f
|
@ -0,0 +1,63 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="16.5mm"
|
||||
height="8.6603003mm"
|
||||
viewBox="0 0 58.464567 30.686103"
|
||||
id="svg2"
|
||||
version="1.1"
|
||||
inkscape:version="0.91 r13725"
|
||||
sodipodi:docname="arrow.svg">
|
||||
<defs
|
||||
id="defs4" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="11.2"
|
||||
inkscape:cx="14.209234"
|
||||
inkscape:cy="29.780479"
|
||||
inkscape:document-units="px"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1053"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="27"
|
||||
inkscape:window-maximized="1" />
|
||||
<metadata
|
||||
id="metadata7">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(0,-1021.6761)">
|
||||
<path
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 0,1021.6761 35.433071,0 -17.716536,30.6861 z"
|
||||
id="path4140"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="cccc" />
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 1.9 KiB |
|
@ -0,0 +1,605 @@
|
|||
/*
|
||||
* The default CSS style for Documenter.jl generated sites
|
||||
*
|
||||
* Heavily inspired by the Julia Sphinx theme
|
||||
* https://github.com/JuliaLang/JuliaDoc
|
||||
* which extends the sphinx_rtd_theme
|
||||
* https://github.com/snide/sphinx_rtd_theme
|
||||
*
|
||||
* Part of Documenter.jl
|
||||
* https://github.com/JuliaDocs/Documenter.jl
|
||||
*
|
||||
* License: MIT
|
||||
*/
|
||||
|
||||
/* fonts */
|
||||
body, input {
|
||||
font-family: 'Lato', 'Helvetica Neue', Arial, sans-serif;
|
||||
font-size: 16px;
|
||||
color: #222;
|
||||
text-rendering: optimizeLegibility;
|
||||
}
|
||||
|
||||
pre, code, kbd {
|
||||
font-family: 'Roboto Mono', Monaco, courier, monospace;
|
||||
font-size: 0.90em;
|
||||
}
|
||||
|
||||
pre code {
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
a {
|
||||
color: #2980b9;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #3091d1;
|
||||
}
|
||||
|
||||
a:visited {
|
||||
color: #9b59b6;
|
||||
}
|
||||
|
||||
body {
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.75em;
|
||||
}
|
||||
|
||||
/* Unless the <h1> the is very first thing on the page (i.e. the second element
|
||||
* in the <article>, * after the <header>, we add some additional styling to it
|
||||
* to make it stand out a bit more. This way we get a reasonable fallback if CSS3
|
||||
* selectors are not supported in the browser.
|
||||
*/
|
||||
article > h1:not(:nth-child(2)) {
|
||||
margin: 2.5em 0 0;
|
||||
padding-bottom: 0.30em;
|
||||
border-bottom: 1px solid #e5e5e5;
|
||||
}
|
||||
h2 {
|
||||
font-size: 1.50em;
|
||||
margin: 2.3em 0 0;
|
||||
padding-bottom: 0.25em;
|
||||
border-bottom: 1px solid #e5e5e5;
|
||||
}
|
||||
h3 {
|
||||
font-size: 1.25em;
|
||||
margin: 2.0em 0 0;
|
||||
}
|
||||
h4 { font-size: 1.15em; }
|
||||
h5 { font-size: 1.10em; }
|
||||
h6 { font-size: 1em; }
|
||||
|
||||
h4, h5, h6 {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
img {
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
video {
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
th, td {
|
||||
border: 1px solid #e1e4e5;
|
||||
padding: 0.5em 1em;
|
||||
}
|
||||
|
||||
th {
|
||||
border-bottom-width: 2px;
|
||||
}
|
||||
|
||||
tr:nth-child(even) {
|
||||
background-color: #f3f6f6;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 0;
|
||||
border-top: 1px solid #e5e5e5;
|
||||
}
|
||||
|
||||
/* Inline code and code blocks */
|
||||
|
||||
code {
|
||||
padding: 0.1em;
|
||||
background-color: rgba(0,0,0,.04);
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
pre {
|
||||
background-color: #f5f5f5;
|
||||
border: 1px solid #dddddd;
|
||||
border-radius: 3px;
|
||||
padding: 0.5em;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
pre code {
|
||||
padding: 0;
|
||||
background-color: initial;
|
||||
}
|
||||
|
||||
kbd {
|
||||
font-size: 0.70em;
|
||||
display: inline-block;
|
||||
padding: 0.1em 0.5em 0.4em 0.5em;
|
||||
line-height: 1.0em;
|
||||
color: #444d56;
|
||||
vertical-align: middle;
|
||||
background-color: #fafbfc;
|
||||
border: solid 1px #c6cbd1;
|
||||
border-bottom-color: #959da5;
|
||||
border-radius: 3px;
|
||||
box-shadow: inset 0 -1px 0 #959da5;
|
||||
}
|
||||
|
||||
/* Headers in admonitions and docstrings */
|
||||
.admonition h1,
|
||||
article section.docstring h1 {
|
||||
font-size: 1.25em;
|
||||
}
|
||||
|
||||
.admonition h2,
|
||||
article section.docstring h2 {
|
||||
font-size: 1.10em;
|
||||
}
|
||||
|
||||
.admonition h3,
|
||||
.admonition h4,
|
||||
.admonition h5,
|
||||
.admonition h6,
|
||||
article section.docstring h3,
|
||||
article section.docstring h4,
|
||||
article section.docstring h5,
|
||||
article section.docstring h6 {
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
/* Navigation */
|
||||
nav.toc {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
bottom: 0;
|
||||
width: 20em;
|
||||
display: flex;
|
||||
flex-flow: column nowrap;
|
||||
overflow-y: auto;
|
||||
padding: 1em 0 0 0;
|
||||
background-color: #fcfcfc;
|
||||
box-shadow: inset -14px 0px 5px -12px rgb(210,210,210);
|
||||
}
|
||||
|
||||
nav.toc .logo {
|
||||
margin: 0 auto;
|
||||
display: block;
|
||||
max-height: 6em;
|
||||
max-width: 18em;
|
||||
}
|
||||
|
||||
nav.toc h1 {
|
||||
text-align: center;
|
||||
margin-top: .57em;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
nav.toc select {
|
||||
display: block;
|
||||
height: 2em;
|
||||
flex-shrink: 0;
|
||||
padding: 0 1.6em 0 1em;
|
||||
min-width: 7em;
|
||||
max-width: 90%;
|
||||
max-width: calc(100% - 5em);
|
||||
margin: 0 auto;
|
||||
font-size: .83em;
|
||||
border: 1px solid #c9c9c9;
|
||||
border-radius: 1em;
|
||||
|
||||
/* TODO: doesn't seem to be centered on Safari */
|
||||
text-align: center;
|
||||
text-align-last: center;
|
||||
|
||||
appearance: none;
|
||||
-moz-appearance: none;
|
||||
-webkit-appearance: none;
|
||||
|
||||
background: white url("arrow.svg");
|
||||
background-size: 1.155em;
|
||||
background-repeat: no-repeat;
|
||||
background-position: right;
|
||||
}
|
||||
|
||||
nav.toc select:hover {
|
||||
border: 1px solid #a0a0a0;
|
||||
}
|
||||
|
||||
nav.toc select option {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
nav.toc input {
|
||||
display: block;
|
||||
height: 2em;
|
||||
width: 90%;
|
||||
width: calc(100% - 5em);
|
||||
margin: 1.2em auto;
|
||||
padding: 0 1em;
|
||||
border: 1px solid #c9c9c9;
|
||||
border-radius: 1em;
|
||||
font-size: .83em;
|
||||
}
|
||||
|
||||
nav.toc > ul * {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
nav.toc > ul {
|
||||
min-height: 2em;
|
||||
overflow-y: auto;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
nav.toc > ul > li:last-child {
|
||||
padding-bottom: 1em;
|
||||
}
|
||||
|
||||
nav.toc ul {
|
||||
color: #404040;
|
||||
padding: 0;
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
nav.toc ul .toctext {
|
||||
color: inherit;
|
||||
display: block;
|
||||
}
|
||||
|
||||
nav.toc ul a:hover {
|
||||
color: #fcfcfc;
|
||||
background-color: #4e4a4a;
|
||||
}
|
||||
|
||||
nav.toc ul.internal a {
|
||||
color: inherit;
|
||||
display: block;
|
||||
}
|
||||
|
||||
nav.toc ul.internal a:hover {
|
||||
background-color: #d6d6d6;
|
||||
}
|
||||
|
||||
nav.toc ul.internal {
|
||||
background-color: #e3e3e3;
|
||||
box-shadow: inset -14px 0px 5px -12px rgb(210,210,210);
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
nav.toc ul.internal li.toplevel {
|
||||
border-top: 1px solid #909090;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
nav.toc ul.internal li.toplevel:first-child {
|
||||
border-top: none;
|
||||
}
|
||||
|
||||
nav.toc .toctext {
|
||||
padding-top: 0.3em;
|
||||
padding-bottom: 0.3em;
|
||||
padding-right: 1em;
|
||||
}
|
||||
|
||||
nav.toc ul .toctext {
|
||||
padding-left: 1em;
|
||||
}
|
||||
|
||||
nav.toc ul ul .toctext {
|
||||
padding-left: 2em;
|
||||
}
|
||||
|
||||
nav.toc ul ul ul .toctext {
|
||||
padding-left: 3em;
|
||||
}
|
||||
|
||||
nav.toc li.current > .toctext {
|
||||
border-top: 1px solid #c9c9c9;
|
||||
border-bottom: 1px solid #c9c9c9;
|
||||
color: #404040;
|
||||
font-weight: bold;
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
nav.toc ul::-webkit-scrollbar {
|
||||
width: .4em;
|
||||
background: none;
|
||||
}
|
||||
|
||||
nav.toc ul::-webkit-scrollbar-thumb {
|
||||
border-radius: 5px;
|
||||
background: #c9c9c9;
|
||||
}
|
||||
|
||||
nav.toc ul::-webkit-scrollbar-thumb:hover {
|
||||
border-radius: 5px;
|
||||
background: #aaaaaa;
|
||||
}
|
||||
|
||||
article {
|
||||
margin-left: 20em;
|
||||
min-width: 20em;
|
||||
max-width: 48em;
|
||||
padding: 2em;
|
||||
}
|
||||
|
||||
article > header {}
|
||||
|
||||
article > header div#topbar {
|
||||
display: none;
|
||||
}
|
||||
|
||||
article > header nav ul {
|
||||
display: inline-block;
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
article > header nav li {
|
||||
display: inline-block;
|
||||
padding-right: 0.2em;
|
||||
}
|
||||
|
||||
article > header nav li:before {
|
||||
content: "»";
|
||||
padding-right: 0.2em;
|
||||
}
|
||||
|
||||
article > header .edit-page {
|
||||
float: right;
|
||||
}
|
||||
|
||||
article > footer {}
|
||||
|
||||
article > footer a.prev {
|
||||
float: left;
|
||||
}
|
||||
article > footer a.next {
|
||||
float: right;
|
||||
}
|
||||
|
||||
article > footer a .direction:after {
|
||||
content: ": ";
|
||||
}
|
||||
|
||||
article hr {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
article section.docstring {
|
||||
border: 1px solid #ddd;
|
||||
margin: 0.5em 0;
|
||||
padding: 0.5em;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
article section.docstring .docstring-header {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
article section.docstring .docstring-binding {
|
||||
color: #333;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
article section.docstring .docstring-category {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
article section.docstring a.source-link {
|
||||
display: block;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.nav-anchor,
|
||||
.nav-anchor:hover,
|
||||
.nav-anchor:visited {
|
||||
color: #333;
|
||||
}
|
||||
|
||||
/*
|
||||
* Admonitions
|
||||
*
|
||||
* Colors (title, body)
|
||||
* warning: #f0b37e #ffedcc (orange)
|
||||
* note: #6ab0de #e7f2fa (blue)
|
||||
* tip: #1abc9c #dbfaf4 (green)
|
||||
*/
|
||||
.admonition {
|
||||
border-radius: 3px;
|
||||
background-color: #eeeeee;
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
.admonition-title {
|
||||
border-radius: 3px 3px 0 0;
|
||||
background-color: #9b9b9b;
|
||||
padding: 0.15em 0.5em;
|
||||
}
|
||||
|
||||
.admonition-text {
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
.admonition-text > :first-child {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
.admonition-text > :last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.admonition > .admonition-title:before {
|
||||
font-family: "FontAwesome";
|
||||
margin-right: 5px;
|
||||
content: "\f06a";
|
||||
}
|
||||
|
||||
.admonition.warning > .admonition-title {
|
||||
background-color: #f0b37e;
|
||||
}
|
||||
|
||||
.admonition.warning {
|
||||
background-color: #ffedcc;
|
||||
}
|
||||
|
||||
.admonition.note > .admonition-title {
|
||||
background-color: #6ab0de;
|
||||
}
|
||||
|
||||
.admonition.note {
|
||||
background-color: #e7f2fa;
|
||||
}
|
||||
|
||||
.admonition.tip > .admonition-title {
|
||||
background-color: #1abc9c;
|
||||
}
|
||||
|
||||
.admonition.tip {
|
||||
background-color: #dbfaf4;
|
||||
}
|
||||
|
||||
|
||||
/* footnotes */
|
||||
.footnote {
|
||||
padding-left: 0.8em;
|
||||
border-left: 2px solid #ccc;
|
||||
}
|
||||
|
||||
/* Search page */
|
||||
#search-results .category {
|
||||
font-size: smaller;
|
||||
}
|
||||
|
||||
/* Overriding the <code> block style of highligh.js.
|
||||
* We have to override the padding and the background-color, since we style this
|
||||
* part ourselves. Specifically, we style the <pre> surrounding the <code>, while
|
||||
* highlight.js applies the .hljs style directly to the <code> tag.
|
||||
*/
|
||||
.hljs {
|
||||
background-color: transparent;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
@media only screen and (max-width: 768px) {
|
||||
nav.toc {
|
||||
position: fixed;
|
||||
width: 16em;
|
||||
left: -16em;
|
||||
-webkit-overflow-scrolling: touch;
|
||||
-webkit-transition-property: left; /* Safari */
|
||||
-webkit-transition-duration: 0.3s; /* Safari */
|
||||
transition-property: left;
|
||||
transition-duration: 0.3s;
|
||||
-webkit-transition-timing-function: ease-out; /* Safari */
|
||||
transition-timing-function: ease-out;
|
||||
z-index: 2;
|
||||
box-shadow: 5px 0px 5px 0px rgb(210,210,210);
|
||||
}
|
||||
|
||||
nav.toc.show {
|
||||
left: 0;
|
||||
}
|
||||
|
||||
article {
|
||||
margin-left: 0;
|
||||
padding: 3em 0.9em 0 0.9em; /* top right bottom left */
|
||||
overflow-wrap: break-word;
|
||||
}
|
||||
|
||||
article > header {
|
||||
position: fixed;
|
||||
left: 0;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
article > header nav, hr {
|
||||
display: none;
|
||||
}
|
||||
|
||||
article > header div#topbar {
|
||||
display: block; /* is mobile */
|
||||
position: fixed;
|
||||
width: 100%;
|
||||
height: 1.5em;
|
||||
padding-top: 1em;
|
||||
padding-bottom: 1em;
|
||||
background-color: #fcfcfc;
|
||||
box-shadow: 0 1px 3px rgba(0,0,0,.26);
|
||||
top: 0;
|
||||
-webkit-transition-property: top; /* Safari */
|
||||
-webkit-transition-duration: 0.3s; /* Safari */
|
||||
transition-property: top;
|
||||
transition-duration: 0.3s;
|
||||
}
|
||||
|
||||
article > header div#topbar.headroom--unpinned.headroom--not-top.headroom--not-bottom {
|
||||
top: -4em;
|
||||
-webkit-transition-property: top; /* Safari */
|
||||
-webkit-transition-duration: 0.7s; /* Safari */
|
||||
transition-property: top;
|
||||
transition-duration: 0.7s;
|
||||
}
|
||||
|
||||
article > header div#topbar span {
|
||||
width: 80%;
|
||||
height: 1.5em;
|
||||
margin-top: -0.1em;
|
||||
margin-left: 0.9em;
|
||||
font-size: 1.2em;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
article > header div#topbar a.fa-bars {
|
||||
float: right;
|
||||
padding: 0.6em;
|
||||
margin-top: -0.6em;
|
||||
margin-right: 0.3em;
|
||||
font-size: 1.5em;
|
||||
}
|
||||
|
||||
article > header div#topbar a.fa-bars:visited {
|
||||
color: #3091d1;
|
||||
}
|
||||
|
||||
article table {
|
||||
overflow-x: auto;
|
||||
display: block;
|
||||
}
|
||||
|
||||
article div.MathJax_Display {
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
article span.MathJax {
|
||||
overflow: hidden;
|
||||
}
|
||||
}
|
||||
|
||||
@media only screen and (max-width: 320px) {
|
||||
body {
|
||||
font-size: 15px;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,132 @@
|
|||
/*
|
||||
* Part of Documenter.jl
|
||||
* https://github.com/JuliaDocs/Documenter.jl
|
||||
*
|
||||
* License: MIT
|
||||
*/
|
||||
|
||||
requirejs.config({
|
||||
paths: {
|
||||
'jquery': 'https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min',
|
||||
'jqueryui': 'https://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.12.0/jquery-ui.min',
|
||||
'headroom': 'https://cdnjs.cloudflare.com/ajax/libs/headroom/0.9.3/headroom.min',
|
||||
'mathjax': 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS_HTML',
|
||||
'highlight': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min',
|
||||
'highlight-julia': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/julia.min',
|
||||
'highlight-julia-repl': 'https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/julia-repl.min',
|
||||
},
|
||||
shim: {
|
||||
'mathjax' : {
|
||||
exports: "MathJax"
|
||||
},
|
||||
'highlight-julia': ['highlight'],
|
||||
'highlight-julia-repl': ['highlight'],
|
||||
}
|
||||
});
|
||||
|
||||
// Load MathJax
|
||||
require(['mathjax'], function(MathJax) {
|
||||
MathJax.Hub.Config({
|
||||
"tex2jax": {
|
||||
inlineMath: [['$','$'], ['\\(','\\)']],
|
||||
processEscapes: true
|
||||
}
|
||||
});
|
||||
MathJax.Hub.Config({
|
||||
config: ["MMLorHTML.js"],
|
||||
jax: [
|
||||
"input/TeX",
|
||||
"output/HTML-CSS",
|
||||
"output/NativeMML"
|
||||
],
|
||||
extensions: [
|
||||
"MathMenu.js",
|
||||
"MathZoom.js",
|
||||
"TeX/AMSmath.js",
|
||||
"TeX/AMSsymbols.js",
|
||||
"TeX/autobold.js",
|
||||
"TeX/autoload-all.js"
|
||||
]
|
||||
});
|
||||
MathJax.Hub.Config({
|
||||
TeX: { equationNumbers: { autoNumber: "AMS" } }
|
||||
});
|
||||
})
|
||||
|
||||
require(['jquery', 'highlight', 'highlight-julia', 'highlight-julia-repl'], function($, hljs) {
|
||||
$(document).ready(function() {
|
||||
hljs.initHighlighting();
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
// update the version selector with info from the siteinfo.js and ../versions.js files
|
||||
require(['jquery'], function($) {
|
||||
$(document).ready(function() {
|
||||
var version_selector = $("#version-selector");
|
||||
|
||||
// add the current version to the selector based on siteinfo.js, but only if the selector is empty
|
||||
if (typeof DOCUMENTER_CURRENT_VERSION !== 'undefined' && $('#version-selector > option').length == 0) {
|
||||
var option = $("<option value='#' selected='selected'>" + DOCUMENTER_CURRENT_VERSION + "</option>");
|
||||
version_selector.append(option);
|
||||
}
|
||||
|
||||
if (typeof DOC_VERSIONS !== 'undefined') {
|
||||
var existing_versions = $('#version-selector > option');
|
||||
var existing_versions_texts = existing_versions.map(function(i,x){return x.text});
|
||||
DOC_VERSIONS.forEach(function(each) {
|
||||
var version_url = documenterBaseURL + "/../" + each;
|
||||
var existing_id = $.inArray(each, existing_versions_texts);
|
||||
// if not already in the version selector, add it as a new option,
|
||||
// otherwise update the old option with the URL and enable it
|
||||
if (existing_id == -1) {
|
||||
var option = $("<option value='" + version_url + "'>" + each + "</option>");
|
||||
version_selector.append(option);
|
||||
} else {
|
||||
var option = existing_versions[existing_id];
|
||||
option.value = version_url;
|
||||
option.disabled = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// only show the version selector if the selector has been populated
|
||||
if ($('#version-selector > option').length > 0) {
|
||||
version_selector.css("visibility", "visible");
|
||||
}
|
||||
|
||||
// Scroll the navigation bar to the currently selected menu item
|
||||
$("nav.toc > ul").get(0).scrollTop = $(".current").get(0).offsetTop - $("nav.toc > ul").get(0).offsetTop;
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
// mobile
|
||||
require(['jquery', 'headroom'], function($, Headroom) {
|
||||
$(document).ready(function() {
|
||||
var navtoc = $("nav.toc");
|
||||
$("nav.toc li.current a.toctext").click(function() {
|
||||
navtoc.toggleClass('show');
|
||||
});
|
||||
$("article > header div#topbar a.fa-bars").click(function(ev) {
|
||||
ev.preventDefault();
|
||||
navtoc.toggleClass('show');
|
||||
if (navtoc.hasClass('show')) {
|
||||
var title = $("article > header div#topbar span").text();
|
||||
$("nav.toc ul li a:contains('" + title + "')").focus();
|
||||
}
|
||||
});
|
||||
$("article#docs").bind('click', function(ev) {
|
||||
if ($(ev.target).is('div#topbar a.fa-bars')) {
|
||||
return;
|
||||
}
|
||||
if (navtoc.hasClass('show')) {
|
||||
navtoc.removeClass('show');
|
||||
}
|
||||
});
|
||||
if ($("article > header div#topbar").css('display') == 'block') {
|
||||
var headroom = new Headroom(document.querySelector("article > header div#topbar"), {"tolerance": {"up": 10, "down": 10}});
|
||||
headroom.init();
|
||||
}
|
||||
})
|
||||
})
|
|
@ -0,0 +1,113 @@
|
|||
@import url('https://fonts.googleapis.com/css?family=Lato:400,400i');
|
||||
|
||||
body {
|
||||
font-family: Lato, "Segoe UI",Roboto,"Helvetica Neue",Arial,sans-serif;
|
||||
}
|
||||
|
||||
nav.toc {
|
||||
padding-top: 0;
|
||||
background: rgb(240, 240, 240);
|
||||
line-height: 2em;
|
||||
cursor: default;
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
h1+h2 {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
/* Green banner in ToC */
|
||||
nav.toc > h1 {
|
||||
margin-top: 0;
|
||||
padding-top: 0.4em;
|
||||
padding-bottom: 0.5em;
|
||||
border-bottom: 5px solid white;
|
||||
box-shadow: 0px -2px 5px rgb(60,60,60);
|
||||
margin-bottom: 0.5em;
|
||||
background: rgb(60, 150, 60);
|
||||
|
||||
font-style: italic;
|
||||
font-weight: normal;
|
||||
font-size: 50pt;
|
||||
text-transform: lowercase;
|
||||
text-shadow: 2px 2px 5px rgba(0,0,0,0.2);
|
||||
color: white;
|
||||
}
|
||||
|
||||
/* Reduce ToC font size */
|
||||
.toctext {
|
||||
font-size: 10pt;
|
||||
}
|
||||
|
||||
/* Fade out non-clickable ToC headers */
|
||||
nav.toc ul span.toctext {
|
||||
color: rgb(180, 180, 180);
|
||||
}
|
||||
|
||||
nav.toc ul .toctext {
|
||||
color: rgb(100, 100, 100);
|
||||
}
|
||||
|
||||
nav.toc ul a.toctext:hover {
|
||||
color: inherit;
|
||||
background: rgb(220, 220, 220);
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
nav.toc li.current > .toctext {
|
||||
background: linear-gradient(90deg, rgb(245,245,245) 0%, white 90%);
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
nav.toc ul.internal li.toplevel {
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
/* Content */
|
||||
|
||||
article { max-width: none; }
|
||||
|
||||
article > p, article > ul {
|
||||
max-width: 45em;
|
||||
}
|
||||
|
||||
/* Links */
|
||||
a, a:visited { color: rgb(0, 120, 0); }
|
||||
article p a { border-bottom: 1px solid rgb(200, 230, 200); }
|
||||
a:hover, a:visited:hover { color: rgb(0, 80, 0); }
|
||||
|
||||
/* Article Links */
|
||||
article p a { border-bottom: 1px solid rgb(200, 230, 200); }
|
||||
article p a:hover, article a:visited:hover { color: rgb(0, 120, 0); }
|
||||
article p a:hover { border-bottom: 1px solid rgb(150, 200, 150); }
|
||||
|
||||
/* Doctstrings */
|
||||
article section.docstring {
|
||||
padding: 0.5em 0;
|
||||
border-left: none;
|
||||
border-right: none;
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
/* Code */
|
||||
|
||||
article pre, article p > code {
|
||||
background: rgb(245, 250, 245);
|
||||
}
|
||||
|
||||
article pre {
|
||||
border: none;
|
||||
max-width: none;
|
||||
padding: 1em;
|
||||
border-radius: 10px 0px 0px 10px;
|
||||
margin-left: -1em;
|
||||
margin-right: -2em;
|
||||
}
|
||||
|
||||
.hljs-comment {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.hljs-number {
|
||||
color: rgb(0, 150, 150);
|
||||
}
|
|
@ -0,0 +1,250 @@
|
|||
/*
|
||||
* Part of Documenter.jl
|
||||
* https://github.com/JuliaDocs/Documenter.jl
|
||||
*
|
||||
* License: MIT
|
||||
*/
|
||||
|
||||
// parseUri 1.2.2
|
||||
// (c) Steven Levithan <stevenlevithan.com>
|
||||
// MIT License
|
||||
function parseUri (str) {
|
||||
var o = parseUri.options,
|
||||
m = o.parser[o.strictMode ? "strict" : "loose"].exec(str),
|
||||
uri = {},
|
||||
i = 14;
|
||||
|
||||
while (i--) uri[o.key[i]] = m[i] || "";
|
||||
|
||||
uri[o.q.name] = {};
|
||||
uri[o.key[12]].replace(o.q.parser, function ($0, $1, $2) {
|
||||
if ($1) uri[o.q.name][$1] = $2;
|
||||
});
|
||||
|
||||
return uri;
|
||||
};
|
||||
parseUri.options = {
|
||||
strictMode: false,
|
||||
key: ["source","protocol","authority","userInfo","user","password","host","port","relative","path","directory","file","query","anchor"],
|
||||
q: {
|
||||
name: "queryKey",
|
||||
parser: /(?:^|&)([^&=]*)=?([^&]*)/g
|
||||
},
|
||||
parser: {
|
||||
strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
|
||||
loose: /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/
|
||||
}
|
||||
};
|
||||
|
||||
requirejs.config({
|
||||
paths: {
|
||||
'jquery': 'https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min',
|
||||
'lunr': 'https://cdnjs.cloudflare.com/ajax/libs/lunr.js/2.3.5/lunr.min',
|
||||
'lodash': 'https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.min',
|
||||
}
|
||||
});
|
||||
|
||||
var currentScript = document.currentScript;
|
||||
|
||||
require(["jquery", "lunr", "lodash"], function($, lunr, _) {
|
||||
$("#search-form").submit(function(e) {
|
||||
e.preventDefault()
|
||||
})
|
||||
|
||||
// list below is the lunr 2.1.3 list minus the intersect with names(Base)
|
||||
// (all, any, get, in, is, which) and (do, else, for, let, where, while, with)
|
||||
// ideally we'd just filter the original list but it's not available as a variable
|
||||
lunr.stopWordFilter = lunr.generateStopWordFilter([
|
||||
'a',
|
||||
'able',
|
||||
'about',
|
||||
'across',
|
||||
'after',
|
||||
'almost',
|
||||
'also',
|
||||
'am',
|
||||
'among',
|
||||
'an',
|
||||
'and',
|
||||
'are',
|
||||
'as',
|
||||
'at',
|
||||
'be',
|
||||
'because',
|
||||
'been',
|
||||
'but',
|
||||
'by',
|
||||
'can',
|
||||
'cannot',
|
||||
'could',
|
||||
'dear',
|
||||
'did',
|
||||
'does',
|
||||
'either',
|
||||
'ever',
|
||||
'every',
|
||||
'from',
|
||||
'got',
|
||||
'had',
|
||||
'has',
|
||||
'have',
|
||||
'he',
|
||||
'her',
|
||||
'hers',
|
||||
'him',
|
||||
'his',
|
||||
'how',
|
||||
'however',
|
||||
'i',
|
||||
'if',
|
||||
'into',
|
||||
'it',
|
||||
'its',
|
||||
'just',
|
||||
'least',
|
||||
'like',
|
||||
'likely',
|
||||
'may',
|
||||
'me',
|
||||
'might',
|
||||
'most',
|
||||
'must',
|
||||
'my',
|
||||
'neither',
|
||||
'no',
|
||||
'nor',
|
||||
'not',
|
||||
'of',
|
||||
'off',
|
||||
'often',
|
||||
'on',
|
||||
'only',
|
||||
'or',
|
||||
'other',
|
||||
'our',
|
||||
'own',
|
||||
'rather',
|
||||
'said',
|
||||
'say',
|
||||
'says',
|
||||
'she',
|
||||
'should',
|
||||
'since',
|
||||
'so',
|
||||
'some',
|
||||
'than',
|
||||
'that',
|
||||
'the',
|
||||
'their',
|
||||
'them',
|
||||
'then',
|
||||
'there',
|
||||
'these',
|
||||
'they',
|
||||
'this',
|
||||
'tis',
|
||||
'to',
|
||||
'too',
|
||||
'twas',
|
||||
'us',
|
||||
'wants',
|
||||
'was',
|
||||
'we',
|
||||
'were',
|
||||
'what',
|
||||
'when',
|
||||
'who',
|
||||
'whom',
|
||||
'why',
|
||||
'will',
|
||||
'would',
|
||||
'yet',
|
||||
'you',
|
||||
'your'
|
||||
])
|
||||
|
||||
// add . as a separator, because otherwise "title": "Documenter.Anchors.add!"
|
||||
// would not find anything if searching for "add!", only for the entire qualification
|
||||
lunr.tokenizer.separator = /[\s\-\.]+/
|
||||
|
||||
// custom trimmer that doesn't strip @ and !, which are used in julia macro and function names
|
||||
lunr.trimmer = function (token) {
|
||||
return token.update(function (s) {
|
||||
return s.replace(/^[^a-zA-Z0-9@!]+/, '').replace(/[^a-zA-Z0-9@!]+$/, '')
|
||||
})
|
||||
}
|
||||
|
||||
lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'juliaStopWordFilter')
|
||||
lunr.Pipeline.registerFunction(lunr.trimmer, 'juliaTrimmer')
|
||||
|
||||
var index = lunr(function () {
|
||||
this.ref('location')
|
||||
this.field('title')
|
||||
this.field('text')
|
||||
documenterSearchIndex['docs'].forEach(function(e) {
|
||||
this.add(e)
|
||||
}, this)
|
||||
})
|
||||
var store = {}
|
||||
|
||||
documenterSearchIndex['docs'].forEach(function(e) {
|
||||
store[e.location] = {title: e.title, category: e.category}
|
||||
})
|
||||
|
||||
$(function(){
|
||||
function update_search(querystring) {
|
||||
tokens = lunr.tokenizer(querystring)
|
||||
results = index.query(function (q) {
|
||||
tokens.forEach(function (t) {
|
||||
q.term(t.toString(), {
|
||||
fields: ["title"],
|
||||
boost: 100,
|
||||
usePipeline: false,
|
||||
editDistance: 0,
|
||||
wildcard: lunr.Query.wildcard.NONE
|
||||
})
|
||||
q.term(t.toString(), {
|
||||
fields: ["title"],
|
||||
boost: 10,
|
||||
usePipeline: false,
|
||||
editDistance: 2,
|
||||
wildcard: lunr.Query.wildcard.NONE
|
||||
})
|
||||
q.term(t.toString(), {
|
||||
fields: ["text"],
|
||||
boost: 1,
|
||||
usePipeline: true,
|
||||
editDistance: 0,
|
||||
wildcard: lunr.Query.wildcard.NONE
|
||||
})
|
||||
})
|
||||
})
|
||||
$('#search-info').text("Number of results: " + results.length)
|
||||
$('#search-results').empty()
|
||||
results.forEach(function(result) {
|
||||
data = store[result.ref]
|
||||
link = $('<a>')
|
||||
link.text(data.title)
|
||||
link.attr('href', documenterBaseURL+'/'+result.ref)
|
||||
cat = $('<span class="category">('+data.category+')</span>')
|
||||
li = $('<li>').append(link).append(" ").append(cat)
|
||||
$('#search-results').append(li)
|
||||
})
|
||||
}
|
||||
|
||||
function update_search_box() {
|
||||
querystring = $('#search-query').val()
|
||||
update_search(querystring)
|
||||
}
|
||||
|
||||
$('#search-query').keyup(_.debounce(update_search_box, 250))
|
||||
$('#search-query').change(update_search_box)
|
||||
|
||||
search_query_uri = parseUri(window.location).queryKey["q"]
|
||||
if(search_query_uri !== undefined) {
|
||||
search_query = decodeURIComponent(search_query_uri.replace(/\+/g, '%20'))
|
||||
$("#search-query").val(search_query)
|
||||
}
|
||||
update_search_box();
|
||||
})
|
||||
})
|
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Community · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics/">Basics</a></li><li><a class="toctext" href="../models/recurrence/">Recurrence</a></li><li><a class="toctext" href="../models/regularisation/">Regularisation</a></li><li><a class="toctext" href="../models/layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../training/training/">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu/">GPU Support</a></li><li><a class="toctext" href="../saving/">Saving & Loading</a></li><li><a class="toctext" href="../performance/">Performance Tips</a></li><li class="current"><a class="toctext" href>Community</a><ul class="internal"></ul></li></ul></nav><article id="docs"><header><nav><ul><li><a href>Community</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/community.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Community</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Community-1" href="#Community-1">Community</a></h1><p>All Flux users are welcome to join our community on the <a href="https://discourse.julialang.org/">Julia forum</a>, or the <a href="https://discourse.julialang.org/t/announcing-a-julia-slack/4866">slack</a> (channel #machine-learning). If you have questions or issues we'll try to help you out.</p><p>If you're interested in hacking on Flux, the <a href="https://github.com/FluxML/Flux.jl">source code</a> is open and easy to understand – it's all just the same Julia code you work with normally. You might be interested in our <a href="https://github.com/FluxML/Flux.jl/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22">intro issues</a> to get started.</p><footer><hr/><a class="previous" href="../performance/"><span class="direction">Previous</span><span class="title">Performance Tips</span></a></footer></article></body></html>
|
|
@ -0,0 +1,40 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>One-Hot Encoding · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../../models/basics/">Basics</a></li><li><a class="toctext" href="../../models/recurrence/">Recurrence</a></li><li><a class="toctext" href="../../models/regularisation/">Regularisation</a></li><li><a class="toctext" href="../../models/layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../../training/training/">Training</a></li></ul></li><li class="current"><a class="toctext" href>One-Hot Encoding</a><ul class="internal"><li><a class="toctext" href="#Batches-1">Batches</a></li></ul></li><li><a class="toctext" href="../../gpu/">GPU Support</a></li><li><a class="toctext" href="../../saving/">Saving & Loading</a></li><li><a class="toctext" href="../../performance/">Performance Tips</a></li><li><a class="toctext" href="../../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href>One-Hot Encoding</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/data/onehot.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>One-Hot Encoding</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="One-Hot-Encoding-1" href="#One-Hot-Encoding-1">One-Hot Encoding</a></h1><p>It's common to encode categorical variables (like <code>true</code>, <code>false</code> or <code>cat</code>, <code>dog</code>) in "one-of-k" or <a href="https://en.wikipedia.org/wiki/One-hot">"one-hot"</a> form. Flux provides the <code>onehot</code> function to make this easy.</p><pre><code class="language-none">julia> using Flux: onehot, onecold
|
||||
|
||||
julia> onehot(:b, [:a, :b, :c])
|
||||
3-element Flux.OneHotVector:
|
||||
false
|
||||
true
|
||||
false
|
||||
|
||||
julia> onehot(:c, [:a, :b, :c])
|
||||
3-element Flux.OneHotVector:
|
||||
false
|
||||
false
|
||||
true</code></pre><p>The inverse is <code>onecold</code> (which can take a general probability distribution, as well as just booleans).</p><pre><code class="language-julia">julia> onecold(ans, [:a, :b, :c])
|
||||
:c
|
||||
|
||||
julia> onecold([true, false, false], [:a, :b, :c])
|
||||
:a
|
||||
|
||||
julia> onecold([0.3, 0.2, 0.5], [:a, :b, :c])
|
||||
:c</code></pre><h2><a class="nav-anchor" id="Batches-1" href="#Batches-1">Batches</a></h2><p><code>onehotbatch</code> creates a batch (matrix) of one-hot vectors, and <code>onecold</code> treats matrices as batches.</p><pre><code class="language-julia">julia> using Flux: onehotbatch
|
||||
|
||||
julia> onehotbatch([:b, :a, :b], [:a, :b, :c])
|
||||
3×3 Flux.OneHotMatrix:
|
||||
false true false
|
||||
true false true
|
||||
false false false
|
||||
|
||||
julia> onecold(ans, [:a, :b, :c])
|
||||
3-element Array{Symbol,1}:
|
||||
:b
|
||||
:a
|
||||
:b</code></pre><p>Note that these operations returned <code>OneHotVector</code> and <code>OneHotMatrix</code> rather than <code>Array</code>s. <code>OneHotVector</code>s behave like normal vectors but avoid any unnecessary cost compared to using an integer index directly. For example, multiplying a matrix with a one-hot vector simply slices out the relevant row of the matrix under the hood.</p><footer><hr/><a class="previous" href="../../training/training/"><span class="direction">Previous</span><span class="title">Training</span></a><a class="next" href="../../gpu/"><span class="direction">Next</span><span class="title">GPU Support</span></a></footer></article></body></html>
|
|
@ -0,0 +1,50 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>GPU Support · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics/">Basics</a></li><li><a class="toctext" href="../models/recurrence/">Recurrence</a></li><li><a class="toctext" href="../models/regularisation/">Regularisation</a></li><li><a class="toctext" href="../models/layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../training/training/">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot/">One-Hot Encoding</a></li><li class="current"><a class="toctext" href>GPU Support</a><ul class="internal"><li><a class="toctext" href="#GPU-Usage-1">GPU Usage</a></li></ul></li><li><a class="toctext" href="../saving/">Saving & Loading</a></li><li><a class="toctext" href="../performance/">Performance Tips</a></li><li><a class="toctext" href="../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href>GPU Support</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/gpu.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>GPU Support</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="GPU-Support-1" href="#GPU-Support-1">GPU Support</a></h1><p>NVIDIA GPU support should work out of the box on systems with CUDA and CUDNN installed. For more details see the <a href="https://github.com/JuliaGPU/CuArrays.jl">CuArrays</a> readme.</p><h2><a class="nav-anchor" id="GPU-Usage-1" href="#GPU-Usage-1">GPU Usage</a></h2><p>Support for array operations on other hardware backends, like GPUs, is provided by external packages like <a href="https://github.com/JuliaGPU/CuArrays.jl">CuArrays</a>. Flux is agnostic to array types, so we simply need to move model weights and data to the GPU and Flux will handle it.</p><p>For example, we can use <code>CuArrays</code> (with the <code>cu</code> converter) to run our <a href="../models/basics/">basic example</a> on an NVIDIA GPU.</p><p>(Note that you need to have CUDA available to use CuArrays – please see the <a href="https://github.com/JuliaGPU/CuArrays.jl">CuArrays.jl</a> instructions for more details.)</p><pre><code class="language-julia">using CuArrays
|
||||
|
||||
W = cu(rand(2, 5)) # a 2×5 CuArray
|
||||
b = cu(rand(2))
|
||||
|
||||
predict(x) = W*x .+ b
|
||||
loss(x, y) = sum((predict(x) .- y).^2)
|
||||
|
||||
x, y = cu(rand(5)), cu(rand(2)) # Dummy data
|
||||
loss(x, y) # ~ 3</code></pre><p>Note that we convert both the parameters (<code>W</code>, <code>b</code>) and the data set (<code>x</code>, <code>y</code>) to cuda arrays. Taking derivatives and training works exactly as before.</p><p>If you define a structured model, like a <code>Dense</code> layer or <code>Chain</code>, you just need to convert the internal parameters. Flux provides <code>fmap</code>, which allows you to alter all parameters of a model at once.</p><pre><code class="language-julia">d = Dense(10, 5, σ)
|
||||
d = fmap(cu, d)
|
||||
d.W # Tracked CuArray
|
||||
d(cu(rand(10))) # CuArray output
|
||||
|
||||
m = Chain(Dense(10, 5, σ), Dense(5, 2), softmax)
|
||||
m = fmap(cu, m)
|
||||
d(cu(rand(10)))</code></pre><p>As a convenience, Flux provides the <code>gpu</code> function to convert models and data to the GPU if one is available. By default, it'll do nothing, but loading <code>CuArrays</code> will cause it to move data to the GPU instead.</p><pre><code class="language-julia">julia> using Flux, CuArrays
|
||||
|
||||
julia> m = Dense(10,5) |> gpu
|
||||
Dense(10, 5)
|
||||
|
||||
julia> x = rand(10) |> gpu
|
||||
10-element CuArray{Float32,1}:
|
||||
0.800225
|
||||
⋮
|
||||
0.511655
|
||||
|
||||
julia> m(x)
|
||||
Tracked 5-element CuArray{Float32,1}:
|
||||
-0.30535
|
||||
⋮
|
||||
-0.618002</code></pre><p>The analogue <code>cpu</code> is also available for moving models and data back off of the GPU.</p><pre><code class="language-julia">julia> x = rand(10) |> gpu
|
||||
10-element CuArray{Float32,1}:
|
||||
0.235164
|
||||
⋮
|
||||
0.192538
|
||||
|
||||
julia> x |> cpu
|
||||
10-element Array{Float32,1}:
|
||||
0.235164
|
||||
⋮
|
||||
0.192538</code></pre><footer><hr/><a class="previous" href="../data/onehot/"><span class="direction">Previous</span><span class="title">One-Hot Encoding</span></a><a class="next" href="../saving/"><span class="direction">Next</span><span class="title">Saving & Loading</span></a></footer></article></body></html>
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,113 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Basics · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../../">Home</a></li><li><span class="toctext">Building Models</span><ul><li class="current"><a class="toctext" href>Basics</a><ul class="internal"><li><a class="toctext" href="#Taking-Gradients-1">Taking Gradients</a></li><li><a class="toctext" href="#Simple-Models-1">Simple Models</a></li><li><a class="toctext" href="#Building-Layers-1">Building Layers</a></li><li><a class="toctext" href="#Stacking-It-Up-1">Stacking It Up</a></li><li><a class="toctext" href="#Layer-helpers-1">Layer helpers</a></li></ul></li><li><a class="toctext" href="../recurrence/">Recurrence</a></li><li><a class="toctext" href="../regularisation/">Regularisation</a></li><li><a class="toctext" href="../layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../../training/training/">Training</a></li></ul></li><li><a class="toctext" href="../../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../../gpu/">GPU Support</a></li><li><a class="toctext" href="../../saving/">Saving & Loading</a></li><li><a class="toctext" href="../../performance/">Performance Tips</a></li><li><a class="toctext" href="../../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href>Basics</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/basics.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Basics</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Model-Building-Basics-1" href="#Model-Building-Basics-1">Model-Building Basics</a></h1><h2><a class="nav-anchor" id="Taking-Gradients-1" href="#Taking-Gradients-1">Taking Gradients</a></h2><p>Flux's core feature is taking gradients of Julia code. The <code>gradient</code> function takes another Julia function <code>f</code> and a set of arguments, and returns the gradient with respect to each argument. (It's a good idea to try pasting these examples in the Julia terminal.)</p><pre><code class="language-julia-repl">julia> using Flux
|
||||
|
||||
julia> f(x) = 3x^2 + 2x + 1;
|
||||
|
||||
julia> df(x) = gradient(f, x)[1]; # df/dx = 6x + 2
|
||||
|
||||
julia> df(2)
|
||||
14
|
||||
|
||||
julia> d2f(x) = gradient(df, x)[1]; # d²f/dx² = 6
|
||||
|
||||
julia> d2f(2)
|
||||
6</code></pre><p>When a function has many parameters, we can get gradients of each one at the same time:</p><pre><code class="language-julia-repl">julia> f(x, y) = sum((x .- y).^2);
|
||||
|
||||
julia> gradient(f, [2, 1], [2, 0])
|
||||
([0, 2], [0, -2])</code></pre><p>But machine learning models can have <em>hundreds</em> of parameters! To handle this, Flux lets you work with collections of parameters, via <code>params</code>. You can get the gradient of all parameters used in a program without explicitly passing them in.</p><pre><code class="language-julia-repl">julia> using Flux
|
||||
|
||||
julia> x = [2, 1];
|
||||
|
||||
julia> y = [2, 0];
|
||||
|
||||
julia> gs = gradient(params(x, y)) do
|
||||
f(x, y)
|
||||
end
|
||||
Grads(...)
|
||||
|
||||
julia> gs[x]
|
||||
2-element Array{Int64,1}:
|
||||
0
|
||||
2
|
||||
|
||||
julia> gs[y]
|
||||
2-element Array{Int64,1}:
|
||||
0
|
||||
-2</code></pre><p>Here, <code>gradient</code> takes a zero-argument function; no arguments are necessary because the <code>params</code> tell it what to differentiate.</p><p>This will come in really handy when dealing with big, complicated models. For now, though, let's start with something simple.</p><h2><a class="nav-anchor" id="Simple-Models-1" href="#Simple-Models-1">Simple Models</a></h2><p>Consider a simple linear regression, which tries to predict an output array <code>y</code> from an input <code>x</code>.</p><pre><code class="language-julia">W = rand(2, 5)
|
||||
b = rand(2)
|
||||
|
||||
predict(x) = W*x .+ b
|
||||
|
||||
function loss(x, y)
|
||||
ŷ = predict(x)
|
||||
sum((y .- ŷ).^2)
|
||||
end
|
||||
|
||||
x, y = rand(5), rand(2) # Dummy data
|
||||
loss(x, y) # ~ 3</code></pre><p>To improve the prediction we can take the gradients of <code>W</code> and <code>b</code> with respect to the loss and perform gradient descent.</p><pre><code class="language-julia">using Flux
|
||||
|
||||
gs = gradient(() -> loss(x, y), params(W, b))</code></pre><p>Now that we have gradients, we can pull them out and update <code>W</code> to train the model.</p><pre><code class="language-julia">W̄ = gs[W]
|
||||
|
||||
W .-= 0.1 .* W̄
|
||||
|
||||
loss(x, y) # ~ 2.5</code></pre><p>The loss has decreased a little, meaning that our prediction <code>x</code> is closer to the target <code>y</code>. If we have some data we can already try <a href="../../training/training/">training the model</a>.</p><p>All deep learning in Flux, however complex, is a simple generalisation of this example. Of course, models can <em>look</em> very different – they might have millions of parameters or complex control flow. Let's see how Flux handles more complex models.</p><h2><a class="nav-anchor" id="Building-Layers-1" href="#Building-Layers-1">Building Layers</a></h2><p>It's common to create more complex models than the linear regression above. For example, we might want to have two linear layers with a nonlinearity like <a href="https://en.wikipedia.org/wiki/Sigmoid_function">sigmoid</a> (<code>σ</code>) in between them. In the above style we could write this as:</p><pre><code class="language-julia">using Flux
|
||||
|
||||
W1 = rand(3, 5)
|
||||
b1 = rand(3)
|
||||
layer1(x) = W1 * x .+ b1
|
||||
|
||||
W2 = rand(2, 3)
|
||||
b2 = rand(2)
|
||||
layer2(x) = W2 * x .+ b2
|
||||
|
||||
model(x) = layer2(σ.(layer1(x)))
|
||||
|
||||
model(rand(5)) # => 2-element vector</code></pre><p>This works but is fairly unwieldy, with a lot of repetition – especially as we add more layers. One way to factor this out is to create a function that returns linear layers.</p><pre><code class="language-julia">function linear(in, out)
|
||||
W = randn(out, in)
|
||||
b = randn(out)
|
||||
x -> W * x .+ b
|
||||
end
|
||||
|
||||
linear1 = linear(5, 3) # we can access linear1.W etc
|
||||
linear2 = linear(3, 2)
|
||||
|
||||
model(x) = linear2(σ.(linear1(x)))
|
||||
|
||||
model(rand(5)) # => 2-element vector</code></pre><p>Another (equivalent) way is to create a struct that explicitly represents the affine layer.</p><pre><code class="language-julia">struct Affine
|
||||
W
|
||||
b
|
||||
end
|
||||
|
||||
Affine(in::Integer, out::Integer) =
|
||||
Affine(randn(out, in), randn(out))
|
||||
|
||||
# Overload call, so the object can be used as a function
|
||||
(m::Affine)(x) = m.W * x .+ m.b
|
||||
|
||||
a = Affine(10, 5)
|
||||
|
||||
a(rand(10)) # => 5-element vector</code></pre><p>Congratulations! You just built the <code>Dense</code> layer that comes with Flux. Flux has many interesting layers available, but they're all things you could have built yourself very easily.</p><p>(There is one small difference with <code>Dense</code> – for convenience it also takes an activation function, like <code>Dense(10, 5, σ)</code>.)</p><h2><a class="nav-anchor" id="Stacking-It-Up-1" href="#Stacking-It-Up-1">Stacking It Up</a></h2><p>It's pretty common to write models that look something like:</p><pre><code class="language-julia">layer1 = Dense(10, 5, σ)
|
||||
# ...
|
||||
model(x) = layer3(layer2(layer1(x)))</code></pre><p>For long chains, it might be a bit more intuitive to have a list of layers, like this:</p><pre><code class="language-julia">using Flux
|
||||
|
||||
layers = [Dense(10, 5, σ), Dense(5, 2), softmax]
|
||||
|
||||
model(x) = foldl((x, m) -> m(x), layers, init = x)
|
||||
|
||||
model(rand(10)) # => 2-element vector</code></pre><p>Handily, this is also provided for in Flux:</p><pre><code class="language-julia">model2 = Chain(
|
||||
Dense(10, 5, σ),
|
||||
Dense(5, 2),
|
||||
softmax)
|
||||
|
||||
model2(rand(10)) # => 2-element vector</code></pre><p>This quickly starts to look like a high-level deep learning library; yet you can see how it falls out of simple abstractions, and we lose none of the power of Julia code.</p><p>A nice property of this approach is that because "models" are just functions (possibly with trainable parameters), you can also see this as simple function composition.</p><pre><code class="language-julia">m = Dense(5, 2) ∘ Dense(10, 5, σ)
|
||||
|
||||
m(rand(10))</code></pre><p>Likewise, <code>Chain</code> will happily work with any Julia function.</p><pre><code class="language-julia">m = Chain(x -> x^2, x -> x+1)
|
||||
|
||||
m(5) # => 26</code></pre><h2><a class="nav-anchor" id="Layer-helpers-1" href="#Layer-helpers-1">Layer helpers</a></h2><p>Flux provides a set of helpers for custom layers, which you can enable by calling</p><pre><code class="language-julia">Flux.@functor Affine</code></pre><p>This enables a useful extra set of functionality for our <code>Affine</code> layer, such as <a href="../../training/optimisers/">collecting its parameters</a> or <a href="../../gpu/">moving it to the GPU</a>.</p><footer><hr/><a class="previous" href="../../"><span class="direction">Previous</span><span class="title">Home</span></a><a class="next" href="../recurrence/"><span class="direction">Next</span><span class="title">Recurrence</span></a></footer></article></body></html>
|
|
@ -0,0 +1,52 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Model Reference · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../basics/">Basics</a></li><li><a class="toctext" href="../recurrence/">Recurrence</a></li><li><a class="toctext" href="../regularisation/">Regularisation</a></li><li class="current"><a class="toctext" href>Model Reference</a><ul class="internal"><li><a class="toctext" href="#Basic-Layers-1">Basic Layers</a></li><li><a class="toctext" href="#Convolution-and-Pooling-Layers-1">Convolution and Pooling Layers</a></li><li><a class="toctext" href="#Recurrent-Layers-1">Recurrent Layers</a></li><li><a class="toctext" href="#Other-General-Purpose-Layers-1">Other General Purpose Layers</a></li><li><a class="toctext" href="#Activation-Functions-1">Activation Functions</a></li><li><a class="toctext" href="#Normalisation-and-Regularisation-1">Normalisation & Regularisation</a></li></ul></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../../training/training/">Training</a></li></ul></li><li><a class="toctext" href="../../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../../gpu/">GPU Support</a></li><li><a class="toctext" href="../../saving/">Saving & Loading</a></li><li><a class="toctext" href="../../performance/">Performance Tips</a></li><li><a class="toctext" href="../../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href>Model Reference</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/layers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Model Reference</span><a class="fa fa-bars" href="#"></a></div></header><h2><a class="nav-anchor" id="Basic-Layers-1" href="#Basic-Layers-1">Basic Layers</a></h2><p>These core layers form the foundation of almost all neural networks.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Chain" href="#Flux.Chain"><code>Flux.Chain</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">Chain(layers...)</code></pre><p>Chain multiple layers / functions together, so that they are called in sequence on a given input.</p><pre><code class="language-julia">m = Chain(x -> x^2, x -> x+1)
|
||||
m(5) == 26
|
||||
|
||||
m = Chain(Dense(10, 5), Dense(5, 2))
|
||||
x = rand(10)
|
||||
m(x) == m[2](m[1](x))</code></pre><p><code>Chain</code> also supports indexing and slicing, e.g. <code>m[2]</code> or <code>m[1:end-1]</code>. <code>m[1:3](x)</code> will calculate the output of the first three layers.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/basic.jl#L1-L18">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Dense" href="#Flux.Dense"><code>Flux.Dense</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">Dense(in::Integer, out::Integer, σ = identity)</code></pre><p>Creates a traditional <code>Dense</code> layer with parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-none">y = σ.(W * x .+ b)</code></pre><p>The input <code>x</code> must be a vector of length <code>in</code>, or a batch of vectors represented as an <code>in × N</code> matrix. The out <code>y</code> will be a vector or batch of length <code>out</code>.</p><pre><code class="language-julia">julia> d = Dense(5, 2)
|
||||
Dense(5, 2)
|
||||
|
||||
julia> d(rand(5))
|
||||
Tracked 2-element Array{Float64,1}:
|
||||
0.00257447
|
||||
-0.00449443</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/basic.jl#L65-L84">source</a></section><h2><a class="nav-anchor" id="Convolution-and-Pooling-Layers-1" href="#Convolution-and-Pooling-Layers-1">Convolution and Pooling Layers</a></h2><p>These layers are used to build convolutional neural networks (CNNs).</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Conv" href="#Flux.Conv"><code>Flux.Conv</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">Conv(size, in=>out)
|
||||
Conv(size, in=>out, relu)</code></pre><p>Standard convolutional layer. <code>size</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively.</p><p>Example: Applying Conv layer to a 1-channel input using a 2x2 window size, giving us a 16-channel output. Output is activated with ReLU.</p><pre><code class="language-none">size = (2,2)
|
||||
in = 1
|
||||
out = 16
|
||||
Conv((2, 2), 1=>16, relu)</code></pre><p>Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a <code>100×100×3×1</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/conv.jl#L5-L25">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.MaxPool" href="#Flux.MaxPool"><code>Flux.MaxPool</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">MaxPool(k)</code></pre><p>Max pooling layer. <code>k</code> stands for the size of the window for each dimension of the input.</p><p>Takes the keyword arguments <code>pad</code> and <code>stride</code>.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/conv.jl#L278-L284">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.MeanPool" href="#Flux.MeanPool"><code>Flux.MeanPool</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">MeanPool(k)</code></pre><p>Mean pooling layer. <code>k</code> stands for the size of the window for each dimension of the input.</p><p>Takes the keyword arguments <code>pad</code> and <code>stride</code>.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/conv.jl#L307-L313">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.DepthwiseConv" href="#Flux.DepthwiseConv"><code>Flux.DepthwiseConv</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">DepthwiseConv(size, in=>out)
|
||||
DepthwiseConv(size, in=>out, relu)</code></pre><p>Depthwise convolutional layer. <code>size</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively. Note that <code>out</code> must be an integer multiple of <code>in</code>.</p><p>Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a <code>100×100×3</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/conv.jl#L143-L155">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.ConvTranspose" href="#Flux.ConvTranspose"><code>Flux.ConvTranspose</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">ConvTranspose(size, in=>out)
|
||||
ConvTranspose(size, in=>out, relu)</code></pre><p>Standard convolutional transpose layer. <code>size</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively.</p><p>Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a <code>100×100×3</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/conv.jl#L71-L82">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.CrossCor" href="#Flux.CrossCor"><code>Flux.CrossCor</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">CrossCor(size, in=>out)
|
||||
CrossCor(size, in=>out, relu)</code></pre><p>Standard cross convolutional layer. <code>size</code> should be a tuple like <code>(2, 2)</code>. <code>in</code> and <code>out</code> specify the number of input and output channels respectively.</p><p>Example: Applying CrossCor layer to a 1-channel input using a 2x2 window size, giving us a 16-channel output. Output is activated with ReLU.</p><pre><code class="language-none">size = (2,2)
|
||||
in = 1
|
||||
out = 16
|
||||
CrossCor((2, 2), 1=>16, relu)</code></pre><p>Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a <code>100×100×3×1</code> array, and a batch of 50 would be a <code>100×100×3×50</code> array.</p><p>Takes the keyword arguments <code>pad</code>, <code>stride</code> and <code>dilation</code>.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/conv.jl#L207-L227">source</a></section><h2><a class="nav-anchor" id="Recurrent-Layers-1" href="#Recurrent-Layers-1">Recurrent Layers</a></h2><p>Much like the core layers above, but can be used to process sequence data (as well as other kinds of structured data).</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.RNN" href="#Flux.RNN"><code>Flux.RNN</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">RNN(in::Integer, out::Integer, σ = tanh)</code></pre><p>The most basic recurrent layer; essentially acts as a <code>Dense</code> layer, but with the output fed back into the input each time step.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/recurrent.jl#L91-L96">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.LSTM" href="#Flux.LSTM"><code>Flux.LSTM</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">LSTM(in::Integer, out::Integer)</code></pre><p>Long Short Term Memory recurrent layer. Behaves like an RNN but generally exhibits a longer memory span over sequences.</p><p>See <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/">this article</a> for a good overview of the internals.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/recurrent.jl#L136-L144">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.GRU" href="#Flux.GRU"><code>Flux.GRU</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">GRU(in::Integer, out::Integer)</code></pre><p>Gated Recurrent Unit layer. Behaves like an RNN but generally exhibits a longer memory span over sequences.</p><p>See <a href="https://colah.github.io/posts/2015-08-Understanding-LSTMs/">this article</a> for a good overview of the internals.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/recurrent.jl#L177-L185">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Recur" href="#Flux.Recur"><code>Flux.Recur</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">Recur(cell)</code></pre><p><code>Recur</code> takes a recurrent cell and makes it stateful, managing the hidden state in the background. <code>cell</code> should be a model of the form:</p><pre><code class="language-none">h, y = cell(h, x...)</code></pre><p>For example, here's a recurrent network that keeps a running total of its inputs.</p><pre><code class="language-julia">accum(h, x) = (h+x, x)
|
||||
rnn = Flux.Recur(accum, 0)
|
||||
rnn(2) # 2
|
||||
rnn(3) # 3
|
||||
rnn.state # 5
|
||||
rnn.(1:10) # apply to a sequence
|
||||
rnn.state # 60</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/recurrent.jl#L7-L26">source</a></section><h2><a class="nav-anchor" id="Other-General-Purpose-Layers-1" href="#Other-General-Purpose-Layers-1">Other General Purpose Layers</a></h2><p>These are marginally more obscure than the Basic Layers. But in contrast to the layers described in the other sections are not readily grouped around a particular purpose (e.g. CNNs or RNNs).</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Maxout" href="#Flux.Maxout"><code>Flux.Maxout</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">Maxout(over)</code></pre><p><code>Maxout</code> is a neural network layer, which has a number of internal layers, which all have the same input, and the maxout returns the elementwise maximium of the internal layers' outputs.</p><p>Maxout over linear dense layers satisfies the univeral approximation theorem.</p><p>Reference: Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron Courville, and Yoshua Bengio.</p><ol><li>Maxout networks.</li></ol><p>In Proceedings of the 30th International Conference on International Conference on Machine Learning - Volume 28 (ICML'13), Sanjoy Dasgupta and David McAllester (Eds.), Vol. 28. JMLR.org III-1319-III-1327. https://arxiv.org/pdf/1302.4389.pdf</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/basic.jl#L149-L164">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.SkipConnection" href="#Flux.SkipConnection"><code>Flux.SkipConnection</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">SkipConnection(layers, connection)</code></pre><p>Creates a Skip Connection, of a layer or <code>Chain</code> of consecutive layers plus a shortcut connection. The connection function will combine the result of the layers with the original input, to give the final output.</p><p>The simplest 'ResNet'-type connection is just <code>SkipConnection(layer, +)</code>, and requires the output of the layers to be the same shape as the input. Here is a more complicated example:</p><pre><code class="language-none">m = Conv((3,3), 4=>7, pad=(1,1))
|
||||
x = ones(5,5,4,10);
|
||||
size(m(x)) == (5, 5, 7, 10)
|
||||
|
||||
sm = SkipConnection(m, (mx, x) -> cat(mx, x, dims=3))
|
||||
size(sm(x)) == (5, 5, 11, 10)</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/basic.jl#L196-L214">source</a></section><h2><a class="nav-anchor" id="Activation-Functions-1" href="#Activation-Functions-1">Activation Functions</a></h2><p>Non-linearities that go between layers of your model. Most of these functions are defined in <a href="https://github.com/FluxML/NNlib.jl">NNlib</a> but are available by default in Flux.</p><p>Note that, unless otherwise stated, activation functions operate on scalars. To apply them to an array you can call <code>σ.(xs)</code>, <code>relu.(xs)</code> and so on.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.σ" href="#NNlib.σ"><code>NNlib.σ</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">σ(x) = 1 / (1 + exp(-x))</code></pre><p>Classic <a href="https://en.wikipedia.org/wiki/Sigmoid_function">sigmoid</a> activation function.</p></div></div></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.relu" href="#NNlib.relu"><code>NNlib.relu</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">relu(x) = max(0, x)</code></pre><p><a href="https://en.wikipedia.org/wiki/Rectifier_(neural_networks)">Rectified Linear Unit</a> activation function.</p></div></div></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.leakyrelu" href="#NNlib.leakyrelu"><code>NNlib.leakyrelu</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">leakyrelu(x) = max(0.01x, x)</code></pre><p>Leaky <a href="https://en.wikipedia.org/wiki/Rectifier_(neural_networks)">Rectified Linear Unit</a> activation function. You can also specify the coefficient explicitly, e.g. <code>leakyrelu(x, 0.01)</code>.</p></div></div></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.elu" href="#NNlib.elu"><code>NNlib.elu</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">elu(x, α = 1) =
|
||||
x > 0 ? x : α * (exp(x) - 1)</code></pre><p>Exponential Linear Unit activation function. See <a href="https://arxiv.org/abs/1511.07289">Fast and Accurate Deep Network Learning by Exponential Linear Units</a>. You can also specify the coefficient explicitly, e.g. <code>elu(x, 1)</code>.</p></div></div></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="NNlib.swish" href="#NNlib.swish"><code>NNlib.swish</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">swish(x) = x * σ(x)</code></pre><p>Self-gated actvation function. See <a href="https://arxiv.org/pdf/1710.05941.pdf">Swish: a Self-Gated Activation Function</a>.</p></div></div></section><h2><a class="nav-anchor" id="Normalisation-and-Regularisation-1" href="#Normalisation-and-Regularisation-1">Normalisation & Regularisation</a></h2><p>These layers don't affect the structure of the network but may improve training times or reduce overfitting.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.BatchNorm" href="#Flux.BatchNorm"><code>Flux.BatchNorm</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">BatchNorm(channels::Integer, σ = identity;
|
||||
initβ = zeros, initγ = ones,
|
||||
ϵ = 1e-8, momentum = .1)</code></pre><p>Batch Normalization layer. The <code>channels</code> input should be the size of the channel dimension in your data (see below).</p><p>Given an array with <code>N</code> dimensions, call the <code>N-1</code>th the channel dimension. (For a batch of feature vectors this is just the data dimension, for <code>WHCN</code> images it's the usual channel dimension.)</p><p><code>BatchNorm</code> computes the mean and variance for each each <code>W×H×1×N</code> slice and shifts them to have a new mean and variance (corresponding to the learnable, per-channel <code>bias</code> and <code>scale</code> parameters).</p><p>See <a href="https://arxiv.org/pdf/1502.03167.pdf">Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift</a>.</p><p>Example:</p><pre><code class="language-julia">m = Chain(
|
||||
Dense(28^2, 64),
|
||||
BatchNorm(64, relu),
|
||||
Dense(64, 10),
|
||||
BatchNorm(10),
|
||||
softmax)</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/normalise.jl#L93-L121">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Dropout" href="#Flux.Dropout"><code>Flux.Dropout</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">Dropout(p, dims = :)</code></pre><p>A Dropout layer. For each input, either sets that input to <code>0</code> (with probability <code>p</code>) or scales it by <code>1/(1-p)</code>. The <code>dims</code> argument is to specified the unbroadcasted dimensions, i.e. <code>dims=1</code> does dropout along columns and <code>dims=2</code> along rows. This is used as a regularisation, i.e. it reduces overfitting during training. see also <a href="models/@ref"><code>dropout</code></a>.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/normalise.jl#L18-L25">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.AlphaDropout" href="#Flux.AlphaDropout"><code>Flux.AlphaDropout</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">AlphaDropout(p)</code></pre><p>A dropout layer. It is used in Self-Normalizing Neural Networks. (https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf) The AlphaDropout layer ensures that mean and variance of activations remains the same as before.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/normalise.jl#L44-L49">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.LayerNorm" href="#Flux.LayerNorm"><code>Flux.LayerNorm</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">LayerNorm(h::Integer)</code></pre><p>A <a href="https://arxiv.org/pdf/1607.06450.pdf">normalisation layer</a> designed to be used with recurrent hidden states of size <code>h</code>. Normalises the mean/stddev of each input before applying a per-neuron gain/bias.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/normalise.jl#L71-L77">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.GroupNorm" href="#Flux.GroupNorm"><code>Flux.GroupNorm</code></a> — <span class="docstring-category">Type</span>.</div><div><div><p>Group Normalization. This layer can outperform Batch-Normalization and Instance-Normalization.</p><pre><code class="language-none">GroupNorm(chs::Integer, G::Integer, λ = identity;
|
||||
initβ = (i) -> zeros(Float32, i), initγ = (i) -> ones(Float32, i),
|
||||
ϵ = 1f-5, momentum = 0.1f0)</code></pre><p><span>$chs$</span> is the number of channels, the channel dimension of your input. For an array of N dimensions, the (N-1)th index is the channel dimension.</p><p><span>$G$</span> is the number of groups along which the statistics would be computed. The number of channels must be an integer multiple of the number of groups.</p><p>Example:</p><pre><code class="language-none">m = Chain(Conv((3,3), 1=>32, leakyrelu;pad = 1),
|
||||
GroupNorm(32,16)) # 32 channels, 16 groups (G = 16), thus 2 channels per group used</code></pre><p>Link : https://arxiv.org/pdf/1803.08494.pdf</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/layers/normalise.jl#L272-L293">source</a></section><footer><hr/><a class="previous" href="../regularisation/"><span class="direction">Previous</span><span class="title">Regularisation</span></a><a class="next" href="../../training/optimisers/"><span class="direction">Next</span><span class="title">Optimisers</span></a></footer></article></body></html>
|
|
@ -0,0 +1,42 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Recurrence · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../basics/">Basics</a></li><li class="current"><a class="toctext" href>Recurrence</a><ul class="internal"><li><a class="toctext" href="#Recurrent-Cells-1">Recurrent Cells</a></li><li><a class="toctext" href="#Stateful-Models-1">Stateful Models</a></li><li><a class="toctext" href="#Sequences-1">Sequences</a></li></ul></li><li><a class="toctext" href="../regularisation/">Regularisation</a></li><li><a class="toctext" href="../layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../../training/training/">Training</a></li></ul></li><li><a class="toctext" href="../../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../../gpu/">GPU Support</a></li><li><a class="toctext" href="../../saving/">Saving & Loading</a></li><li><a class="toctext" href="../../performance/">Performance Tips</a></li><li><a class="toctext" href="../../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href>Recurrence</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/recurrence.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Recurrence</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Recurrent-Models-1" href="#Recurrent-Models-1">Recurrent Models</a></h1><h2><a class="nav-anchor" id="Recurrent-Cells-1" href="#Recurrent-Cells-1">Recurrent Cells</a></h2><p>In the simple feedforward case, our model <code>m</code> is a simple function from various inputs <code>xᵢ</code> to predictions <code>yᵢ</code>. (For example, each <code>x</code> might be an MNIST digit and each <code>y</code> a digit label.) Each prediction is completely independent of any others, and using the same <code>x</code> will always produce the same <code>y</code>.</p><pre><code class="language-julia">y₁ = f(x₁)
|
||||
y₂ = f(x₂)
|
||||
y₃ = f(x₃)
|
||||
# ...</code></pre><p>Recurrent networks introduce a <em>hidden state</em> that gets carried over each time we run the model. The model now takes the old <code>h</code> as an input, and produces a new <code>h</code> as output, each time we run it.</p><pre><code class="language-julia">h = # ... initial state ...
|
||||
h, y₁ = f(h, x₁)
|
||||
h, y₂ = f(h, x₂)
|
||||
h, y₃ = f(h, x₃)
|
||||
# ...</code></pre><p>Information stored in <code>h</code> is preserved for the next prediction, allowing it to function as a kind of memory. This also means that the prediction made for a given <code>x</code> depends on all the inputs previously fed into the model.</p><p>(This might be important if, for example, each <code>x</code> represents one word of a sentence; the model's interpretation of the word "bank" should change if the previous input was "river" rather than "investment".)</p><p>Flux's RNN support closely follows this mathematical perspective. The most basic RNN is as close as possible to a standard <code>Dense</code> layer, and the output is also the hidden state.</p><pre><code class="language-julia">Wxh = randn(5, 10)
|
||||
Whh = randn(5, 5)
|
||||
b = randn(5)
|
||||
|
||||
function rnn(h, x)
|
||||
h = tanh.(Wxh * x .+ Whh * h .+ b)
|
||||
return h, h
|
||||
end
|
||||
|
||||
x = rand(10) # dummy data
|
||||
h = rand(5) # initial hidden state
|
||||
|
||||
h, y = rnn(h, x)</code></pre><p>If you run the last line a few times, you'll notice the output <code>y</code> changing slightly even though the input <code>x</code> is the same.</p><p>We sometimes refer to functions like <code>rnn</code> above, which explicitly manage state, as recurrent <em>cells</em>. There are various recurrent cells available, which are documented in the <a href="../layers/">layer reference</a>. The hand-written example above can be replaced with:</p><pre><code class="language-julia">using Flux
|
||||
|
||||
rnn2 = Flux.RNNCell(10, 5)
|
||||
|
||||
x = rand(10) # dummy data
|
||||
h = rand(5) # initial hidden state
|
||||
|
||||
h, y = rnn2(h, x)</code></pre><h2><a class="nav-anchor" id="Stateful-Models-1" href="#Stateful-Models-1">Stateful Models</a></h2><p>For the most part, we don't want to manage hidden states ourselves, but to treat our models as being stateful. Flux provides the <code>Recur</code> wrapper to do this.</p><pre><code class="language-julia">x = rand(10)
|
||||
h = rand(5)
|
||||
|
||||
m = Flux.Recur(rnn, h)
|
||||
|
||||
y = m(x)</code></pre><p>The <code>Recur</code> wrapper stores the state between runs in the <code>m.state</code> field.</p><p>If you use the <code>RNN(10, 5)</code> constructor – as opposed to <code>RNNCell</code> – you'll see that it's simply a wrapped cell.</p><pre><code class="language-julia">julia> RNN(10, 5)
|
||||
Recur(RNNCell(10, 5, tanh))</code></pre><h2><a class="nav-anchor" id="Sequences-1" href="#Sequences-1">Sequences</a></h2><p>Often we want to work with sequences of inputs, rather than individual <code>x</code>s.</p><pre><code class="language-julia">seq = [rand(10) for i = 1:10]</code></pre><p>With <code>Recur</code>, applying our model to each element of a sequence is trivial:</p><pre><code class="language-julia">m.(seq) # returns a list of 5-element vectors</code></pre><p>This works even when we've chain recurrent layers into a larger model.</p><pre><code class="language-julia">m = Chain(LSTM(10, 15), Dense(15, 5))
|
||||
m.(seq)</code></pre><p>Finally, we can reset the hidden state of the cell back to its initial value using <code>reset!(m)</code>.</p><footer><hr/><a class="previous" href="../basics/"><span class="direction">Previous</span><span class="title">Basics</span></a><a class="next" href="../regularisation/"><span class="direction">Next</span><span class="title">Regularisation</span></a></footer></article></body></html>
|
|
@ -0,0 +1,39 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Regularisation · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../basics/">Basics</a></li><li><a class="toctext" href="../recurrence/">Recurrence</a></li><li class="current"><a class="toctext" href>Regularisation</a><ul class="internal"></ul></li><li><a class="toctext" href="../layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../../training/training/">Training</a></li></ul></li><li><a class="toctext" href="../../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../../gpu/">GPU Support</a></li><li><a class="toctext" href="../../saving/">Saving & Loading</a></li><li><a class="toctext" href="../../performance/">Performance Tips</a></li><li><a class="toctext" href="../../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Building Models</li><li><a href>Regularisation</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/models/regularisation.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Regularisation</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Regularisation-1" href="#Regularisation-1">Regularisation</a></h1><p>Applying regularisation to model parameters is straightforward. We just need to apply an appropriate regulariser, such as <code>norm</code>, to each model parameter and add the result to the overall loss.</p><p>For example, say we have a simple regression.</p><pre><code class="language-julia">using Flux: crossentropy
|
||||
m = Dense(10, 5)
|
||||
loss(x, y) = crossentropy(softmax(m(x)), y)</code></pre><p>We can regularise this by taking the (L2) norm of the parameters, <code>m.W</code> and <code>m.b</code>.</p><pre><code class="language-julia">using LinearAlgebra
|
||||
|
||||
penalty() = norm(m.W) + norm(m.b)
|
||||
loss(x, y) = crossentropy(softmax(m(x)), y) + penalty()</code></pre><p>When working with layers, Flux provides the <code>params</code> function to grab all parameters at once. We can easily penalise everything with <code>sum(norm, params)</code>.</p><pre><code class="language-julia">julia> params(m)
|
||||
2-element Array{Any,1}:
|
||||
param([0.355408 0.533092; … 0.430459 0.171498])
|
||||
param([0.0, 0.0, 0.0, 0.0, 0.0])
|
||||
|
||||
julia> sum(norm, params(m))
|
||||
26.01749952921026 (tracked)</code></pre><p>Here's a larger example with a multi-layer perceptron.</p><pre><code class="language-julia">m = Chain(
|
||||
Dense(28^2, 128, relu),
|
||||
Dense(128, 32, relu),
|
||||
Dense(32, 10), softmax)
|
||||
|
||||
loss(x, y) = crossentropy(m(x), y) + sum(norm, params(m))
|
||||
|
||||
loss(rand(28^2), rand(10))</code></pre><p>One can also easily add per-layer regularisation via the <code>activations</code> function:</p><pre><code class="language-julia">julia> using Flux: activations
|
||||
|
||||
julia> c = Chain(Dense(10,5,σ),Dense(5,2),softmax)
|
||||
Chain(Dense(10, 5, σ), Dense(5, 2), softmax)
|
||||
|
||||
julia> activations(c, rand(10))
|
||||
3-element Array{Any,1}:
|
||||
Float32[0.84682214, 0.6704139, 0.42177814, 0.257832, 0.36255655]
|
||||
Float32[0.1501253, 0.073269576]
|
||||
Float32[0.5192045, 0.48079553]
|
||||
|
||||
julia> sum(norm, ans)
|
||||
2.1166067f0</code></pre><footer><hr/><a class="previous" href="../recurrence/"><span class="direction">Previous</span><span class="title">Recurrence</span></a><a class="next" href="../layers/"><span class="direction">Next</span><span class="title">Model Reference</span></a></footer></article></body></html>
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,50 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Saving & Loading · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics/">Basics</a></li><li><a class="toctext" href="../models/recurrence/">Recurrence</a></li><li><a class="toctext" href="../models/regularisation/">Regularisation</a></li><li><a class="toctext" href="../models/layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../training/training/">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu/">GPU Support</a></li><li class="current"><a class="toctext" href>Saving & Loading</a><ul class="internal"><li><a class="toctext" href="#Saving-Model-Weights-1">Saving Model Weights</a></li><li><a class="toctext" href="#Checkpointing-1">Checkpointing</a></li></ul></li><li><a class="toctext" href="../performance/">Performance Tips</a></li><li><a class="toctext" href="../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li><a href>Saving & Loading</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/saving.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Saving & Loading</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Saving-and-Loading-Models-1" href="#Saving-and-Loading-Models-1">Saving and Loading Models</a></h1><p>You may wish to save models so that they can be loaded and run in a later session. The easiest way to do this is via <a href="https://github.com/MikeInnes/BSON.jl">BSON.jl</a>.</p><p>Save a model:</p><pre><code class="language-julia">julia> using Flux
|
||||
|
||||
julia> model = Chain(Dense(10,5,relu),Dense(5,2),softmax)
|
||||
Chain(Dense(10, 5, NNlib.relu), Dense(5, 2), NNlib.softmax)
|
||||
|
||||
julia> using BSON: @save
|
||||
|
||||
julia> @save "mymodel.bson" model</code></pre><p>Load it again:</p><pre><code class="language-julia">julia> using Flux
|
||||
|
||||
julia> using BSON: @load
|
||||
|
||||
julia> @load "mymodel.bson" model
|
||||
|
||||
julia> model
|
||||
Chain(Dense(10, 5, NNlib.relu), Dense(5, 2), NNlib.softmax)</code></pre><p>Models are just normal Julia structs, so it's fine to use any Julia storage format for this purpose. BSON.jl is particularly well supported and most likely to be forwards compatible (that is, models saved now will load in future versions of Flux).</p><div class="admonition note"><div class="admonition-title">Note</div><div class="admonition-text"><p>If a saved model's weights are stored on the GPU, the model will not load later on if there is no GPU support available. It's best to <a href="../gpu/">move your model to the CPU</a> with <code>cpu(model)</code> before saving it.</p></div></div><h2><a class="nav-anchor" id="Saving-Model-Weights-1" href="#Saving-Model-Weights-1">Saving Model Weights</a></h2><p>In some cases it may be useful to save only the model parameters themselves, and rebuild the model architecture in your code. You can use <code>params(model)</code> to get model parameters. You can also use <code>data.(params)</code> to remove tracking.</p><pre><code class="language-Julia">julia> using Flux
|
||||
|
||||
julia> model = Chain(Dense(10,5,relu),Dense(5,2),softmax)
|
||||
Chain(Dense(10, 5, NNlib.relu), Dense(5, 2), NNlib.softmax)
|
||||
|
||||
julia> weights = params(model);
|
||||
|
||||
julia> using BSON: @save
|
||||
|
||||
julia> @save "mymodel.bson" weights</code></pre><p>You can easily load parameters back into a model with <code>Flux.loadparams!</code>.</p><pre><code class="language-julia">julia> using Flux
|
||||
|
||||
julia> model = Chain(Dense(10,5,relu),Dense(5,2),softmax)
|
||||
Chain(Dense(10, 5, NNlib.relu), Dense(5, 2), NNlib.softmax)
|
||||
|
||||
julia> using BSON: @load
|
||||
|
||||
julia> @load "mymodel.bson" weights
|
||||
|
||||
julia> Flux.loadparams!(model, weights)</code></pre><p>The new <code>model</code> we created will now be identical to the one we saved parameters for.</p><h2><a class="nav-anchor" id="Checkpointing-1" href="#Checkpointing-1">Checkpointing</a></h2><p>In longer training runs it's a good idea to periodically save your model, so that you can resume if training is interrupted (for example, if there's a power cut). You can do this by saving the model in the <a href="../training/training/">callback provided to <code>train!</code></a>.</p><pre><code class="language-julia">using Flux: throttle
|
||||
using BSON: @save
|
||||
|
||||
m = Chain(Dense(10,5,relu),Dense(5,2),softmax)
|
||||
|
||||
evalcb = throttle(30) do
|
||||
# Show loss
|
||||
@save "model-checkpoint.bson" model
|
||||
end</code></pre><p>This will update the <code>"model-checkpoint.bson"</code> file every thirty seconds.</p><p>You can get more advanced by saving a series of models throughout training, for example</p><pre><code class="language-julia">@save "model-$(now()).bson" model</code></pre><p>will produce a series of models like <code>"model-2018-03-06T02:57:10.41.bson"</code>. You could also store the current test set loss, so that it's easy to (for example) revert to an older copy of the model if it starts to overfit.</p><pre><code class="language-julia">@save "model-$(now()).bson" model loss = testloss()</code></pre><p>You can even store optimiser state alongside the model, to resume training exactly where you left off.</p><pre><code class="language-julia">opt = ADAM()
|
||||
@save "model-$(now()).bson" model opt</code></pre><footer><hr/><a class="previous" href="../gpu/"><span class="direction">Previous</span><span class="title">GPU Support</span></a><a class="next" href="../performance/"><span class="direction">Next</span><span class="title">Performance Tips</span></a></footer></article></body></html>
|
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Search · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL=".."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../assets/documenter.js"></script><script src="../siteinfo.js"></script><script src="../../versions.js"></script><link href="../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../models/basics/">Basics</a></li><li><a class="toctext" href="../models/recurrence/">Recurrence</a></li><li><a class="toctext" href="../models/regularisation/">Regularisation</a></li><li><a class="toctext" href="../models/layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../training/optimisers/">Optimisers</a></li><li><a class="toctext" href="../training/training/">Training</a></li></ul></li><li><a class="toctext" href="../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../gpu/">GPU Support</a></li><li><a class="toctext" href="../saving/">Saving & Loading</a></li><li><a class="toctext" href="../performance/">Performance Tips</a></li><li><a class="toctext" href="../community/">Community</a></li></ul></nav><article><header><nav><ul><li>Search</li></ul></nav><hr/><div id="topbar"><span>Search</span><a class="fa fa-bars" href="#"></a></div></header><h1>Search</h1><p id="search-info">Number of results: <span id="search-results-number">loading...</span></p><ul id="search-results"></ul></article></body><script src="../search_index.js"></script><script src="../assets/search.js"></script></html>
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
var DOCUMENTER_CURRENT_VERSION = "v0.10.0";
|
|
@ -0,0 +1,84 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Optimisers · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../../models/basics/">Basics</a></li><li><a class="toctext" href="../../models/recurrence/">Recurrence</a></li><li><a class="toctext" href="../../models/regularisation/">Regularisation</a></li><li><a class="toctext" href="../../models/layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li class="current"><a class="toctext" href>Optimisers</a><ul class="internal"><li><a class="toctext" href="#Optimiser-Reference-1">Optimiser Reference</a></li><li><a class="toctext" href="#Optimiser-Interface-1">Optimiser Interface</a></li><li><a class="toctext" href="#Composing-Optimisers-1">Composing Optimisers</a></li><li><a class="toctext" href="#Decays-1">Decays</a></li></ul></li><li><a class="toctext" href="../training/">Training</a></li></ul></li><li><a class="toctext" href="../../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../../gpu/">GPU Support</a></li><li><a class="toctext" href="../../saving/">Saving & Loading</a></li><li><a class="toctext" href="../../performance/">Performance Tips</a></li><li><a class="toctext" href="../../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href>Optimisers</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/optimisers.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Optimisers</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Optimisers-1" href="#Optimisers-1">Optimisers</a></h1><p>Consider a <a href="../../models/basics/">simple linear regression</a>. We create some dummy data, calculate a loss, and backpropagate to calculate gradients for the parameters <code>W</code> and <code>b</code>.</p><pre><code class="language-julia">using Flux
|
||||
|
||||
W = rand(2, 5)
|
||||
b = rand(2)
|
||||
|
||||
predict(x) = (W * x) .+ b
|
||||
loss(x, y) = sum((predict(x) .- y).^2)
|
||||
|
||||
x, y = rand(5), rand(2) # Dummy data
|
||||
l = loss(x, y) # ~ 3
|
||||
|
||||
θ = Params([W, b])
|
||||
grads = gradient(() -> loss(x, y), θ)</code></pre><p>We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here's one way to do that:</p><pre><code class="language-julia">using Flux: update!
|
||||
|
||||
η = 0.1 # Learning Rate
|
||||
for p in (W, b)
|
||||
update!(p, -η * grads[p])
|
||||
end</code></pre><p>Running this will alter the parameters <code>W</code> and <code>b</code> and our loss should go down. Flux provides a more general way to do optimiser updates like this.</p><pre><code class="language-julia">opt = Descent(0.1) # Gradient descent with learning rate 0.1
|
||||
|
||||
for p in (W, b)
|
||||
update!(opt, p, grads[p])
|
||||
end</code></pre><p>An optimiser <code>update!</code> accepts a parameter and a gradient, and updates the parameter according to the chosen rule. We can also pass <code>opt</code> to our <a href="../training/">training loop</a>, which will update all parameters of the model in a loop. However, we can now easily replace <code>Descent</code> with a more advanced optimiser such as <code>ADAM</code>.</p><h2><a class="nav-anchor" id="Optimiser-Reference-1" href="#Optimiser-Reference-1">Optimiser Reference</a></h2><p>All optimisers return an object that, when passed to <code>train!</code>, will update the parameters passed to it.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.Descent" href="#Flux.Optimise.Descent"><code>Flux.Optimise.Descent</code></a> — <span class="docstring-category">Type</span>.</div><div><div><p>Descent(η)</p><p>Classic gradient descent optimiser with learning rate <code>η</code>. For each parameter <code>p</code> and its gradient <code>δp</code>, this runs <code>p -= η*δp</code></p><p><strong>Parameters</strong></p><ul><li>Learning Rate (η): The amount by which the gradients are discounted before updating the weights. Defaults to <code>0.1</code>.</li></ul><p><strong>Example</strong></p><pre><code class="language-julia-repl">opt = Descent() # uses default η (0.1)
|
||||
|
||||
opt = Descent(0.3) # use provided η
|
||||
|
||||
ps = params(model)
|
||||
|
||||
gs = gradient(ps) do
|
||||
loss(x, y)
|
||||
end
|
||||
|
||||
Flux.Optimise.update!(opt, ps, gs)</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L9-L32">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.Momentum" href="#Flux.Optimise.Momentum"><code>Flux.Optimise.Momentum</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">Momentum(η, ρ)</code></pre><p>Gradient descent with learning rate <code>η</code> and momentum <code>ρ</code>.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (<code>η</code>): Amount by which gradients are discounted before updating the weights. Defaults to <code>0.01</code>.</li><li>Momentum (<code>ρ</code>): Parameter that accelerates descent in the relevant direction and dampens oscillations. Defaults to <code>0.9</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = Momentum() # uses defaults of η = 0.01 and ρ = 0.9
|
||||
|
||||
opt = Momentum(0.01, 0.99)</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L43-L58">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.Nesterov" href="#Flux.Optimise.Nesterov"><code>Flux.Optimise.Nesterov</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">Nesterov(η, ρ)</code></pre><p>Gradient descent with learning rate <code>η</code> and Nesterov momentum <code>ρ</code>.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (η): Amount by which the gradients are dicsounted berfore updating the weights. Defaults to <code>0.001</code>.</li><li>Nesterov Momentum (ρ): Paramters controlling the amount of nesterov momentum to be applied. Defaults to <code>0.9</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = Nesterov() # uses defaults η = 0.001 and ρ = 0.9
|
||||
|
||||
opt = Nesterov(0.003, 0.95)</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L74-L89">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.RMSProp" href="#Flux.Optimise.RMSProp"><code>Flux.Optimise.RMSProp</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">RMSProp(η, ρ)</code></pre><p>Implements the RMSProp algortihm. Often a good choice for recurrent networks. Paramters other than learning rate generally don't need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (η): Defaults to <code>0.001</code>.</li><li>Rho (ρ): Defaults to <code>0.9</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = RMSProp() # uses default η = 0.001 and ρ = 0.9
|
||||
|
||||
opt = RMSProp(0.002, 0.95)</code></pre><p><strong>References</strong></p><p><a href="https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf">RMSProp</a></p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L106-L124">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADAM" href="#Flux.Optimise.ADAM"><code>Flux.Optimise.ADAM</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">ADAM(η, β::Tuple)</code></pre><p>Implements the ADAM optimiser.</p><p><strong>Paramters</strong></p><ul><li>Learning Rate (<code>η</code>): Defaults to <code>0.001</code>.</li><li>Beta (<code>β::Tuple</code>): The first element refers to β1 and the second to β2. Defaults to <code>(0.9, 0.999)</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAM() # uses the default η = 0.001 and β = (0.9, 0.999)
|
||||
|
||||
opt = ADAM(0.001, (0.9, 0.8))</code></pre><p><strong>References</strong></p><p><a href="https://arxiv.org/abs/1412.6980v8">ADAM</a> optimiser.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L140-L158">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.AdaMax" href="#Flux.Optimise.AdaMax"><code>Flux.Optimise.AdaMax</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">AdaMax(η, β::Tuple)</code></pre><p>Variant of ADAM based on ∞-norm.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (η): Defaults to <code>0.001</code></li><li>Beta (β::Tuple): The first element refers to β1 and the second to β2. Defaults to <code>(0.9, 0.999)</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = AdaMax() # uses default η and β
|
||||
|
||||
opt = AdaMax(0.001, (0.9, 0.995))</code></pre><p><strong>References</strong></p><p><a href="https://arxiv.org/abs/1412.6980v9">AdaMax</a> optimiser.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L222-L239">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADAGrad" href="#Flux.Optimise.ADAGrad"><code>Flux.Optimise.ADAGrad</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">ADAGrad(η)</code></pre><p>Implements AdaGrad. It has parameter specific learning rates based on how frequently it is updated.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (η): Defaults to <code>0.1</code></li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAGrad() # uses default η = 0.1
|
||||
|
||||
opt = ADAGrad(0.001)</code></pre><p><strong>References</strong></p><p><a href="http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf">ADAGrad</a> optimiser. Parameters don't need tuning.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L258-L276">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADADelta" href="#Flux.Optimise.ADADelta"><code>Flux.Optimise.ADADelta</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">ADADelta(ρ)</code></pre><p>Version of ADAGrad that adapts learning rate based on a window of past gradient updates. Parameters don't need tuning.</p><p><strong>Parameters</strong></p><ul><li>Rho (ρ): Factor by which gradient is decayed at each time step. Defaults to <code>0.9</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADADelta() # uses default ρ = 0.9
|
||||
opt = ADADelta(0.89)</code></pre><p><strong>References</strong></p><p><a href="https://arxiv.org/abs/1212.5701">ADADelta</a> optimiser.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L291-L307">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.AMSGrad" href="#Flux.Optimise.AMSGrad"><code>Flux.Optimise.AMSGrad</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">AMSGrad(η, β::Tuple)</code></pre><p>Implements AMSGrad version of the ADAM optimiser. Parameters don't need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (η): Defaults to <code>0.001</code>.</li><li>Beta (β::Tuple): The first element refers to β1 and the second to β2. Defaults to <code>(0.9, 0.999)</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = AMSGrad() # uses default η and β
|
||||
opt = AMSGrad(0.001, (0.89, 0.995))</code></pre><p><strong>References</strong></p><p><a href="https://openreview.net/forum?id=ryQu7f-RZ">AMSGrad</a> optimiser.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L324-L341">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.NADAM" href="#Flux.Optimise.NADAM"><code>Flux.Optimise.NADAM</code></a> — <span class="docstring-category">Type</span>.</div><div><div><pre><code class="language-julia">NADAM(η, β::Tuple)</code></pre><p>Nesterov variant of ADAM. Parameters don't need tuning.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (η): Defaults to <code>0.001</code>.</li><li>Beta (β::Tuple): The first element refers to β1 and the second to β2. Defaults to <code>(0.9, 0.999)</code>.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = NADAM() # uses default η and β
|
||||
opt = NADAM(0.002, (0.89, 0.995))</code></pre><p><strong>References</strong></p><p><a href="http://cs229.stanford.edu/proj2015/054_report.pdf">NADAM</a> optimiser.</p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L359-L376">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ADAMW" href="#Flux.Optimise.ADAMW"><code>Flux.Optimise.ADAMW</code></a> — <span class="docstring-category">Function</span>.</div><div><div><pre><code class="language-julia">ADAMW(η, β::Tuple, decay)</code></pre><p>Variant of ADAM defined by fixing weight decay regularization.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (η): Defaults to <code>0.001</code>.</li><li>Beta (β::Tuple): The first element refers to β1 and the second to β2. Defaults to (0.9, 0.999).</li><li>decay: Decay applied to weights during optimisation. Defaults to 0.</li></ul><p><strong>Examples</strong></p><pre><code class="language-julia">opt = ADAMW() # uses default η, β and decay
|
||||
opt = ADAMW(0.001, (0.89, 0.995), 0.1)</code></pre><p><strong>References</strong></p><p><a href="https://arxiv.org/abs/1711.05101">ADAMW</a></p></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L395-L413">source</a></section><h2><a class="nav-anchor" id="Optimiser-Interface-1" href="#Optimiser-Interface-1">Optimiser Interface</a></h2><p>Flux's optimsers are built around a <code>struct</code> that holds all the optimiser parameters along with a definition of how to apply the update rule associated with it. We do this via the <code>apply!</code> function which takes the optimiser as the first argument followed by the parameter and its corresponding gradient.</p><p>In this manner Flux also allows one to create custom optimisers to be used seamlessly. Let's work this with a simple example.</p><pre><code class="language-julia">mutable struct Momentum
|
||||
eta
|
||||
rho
|
||||
velocity
|
||||
end
|
||||
|
||||
Momentum(eta::Real, rho::Real) = Momentum(eta, rho, IdDict())</code></pre><p>The <code>Momentum</code> type will act as our optimiser in this case. Notice that we have added all the parameters as fields, along with the velocity which we will use as our state dictionary. Each parameter in our models will get an entry in there. We can now define the rule applied when this optimiser is invoked.</p><pre><code class="language-julia">function apply!(o::Momentum, x, Δ)
|
||||
η, ρ = o.eta, o.rho
|
||||
v = get!(o.velocity, x, zero(x))::typeof(x)
|
||||
@. v = ρ * v - η * Δ
|
||||
@. Δ = -v
|
||||
end</code></pre><p>This is the basic definition of a Momentum update rule given by:</p><div>\[v = ρ * v - η * Δ
|
||||
w = w - v\]</div><p>The <code>apply!</code> defines the update rules for an optimiser <code>opt</code>, given the parameters and gradients. It returns the updated gradients. Here, every parameter <code>x</code> is retrieved from the running state <code>v</code> and subsequently updates the state of the optimiser.</p><p>Flux internally calls on this function via the <code>update!</code> function. It shares the API with <code>apply!</code> but ensures that multiple parameters are handled gracefully.</p><h2><a class="nav-anchor" id="Composing-Optimisers-1" href="#Composing-Optimisers-1">Composing Optimisers</a></h2><p>Flux defines a special kind of optimiser called simply as <code>Optimiser</code> which takes in a arbitrary optimisers as input. Its behaviour is similar to the usual optimisers, but differs in that it acts by calling the optimisers listed in it sequentially. Each optimiser produces a modified gradient that will be fed into the next, and the resultant update will be applied to the parameter as usual. A classic use case is where adding decays is desirable. Flux defines some basic decays including <code>ExpDecay</code>, <code>InvDecay</code> etc.</p><pre><code class="language-julia">opt = Optimiser(ExpDecay(0.001, 0.1, 1000, 1e-4), Descent())</code></pre><p>Here we apply exponential decay to the <code>Descent</code> optimser. The defaults of <code>ExpDecay</code> say that its learning rate will be decayed every 1000 steps. It is then applied like any optimser.</p><pre><code class="language-julia">w = randn(10, 10)
|
||||
w1 = randn(10,10)
|
||||
ps = Params([w, w1])
|
||||
|
||||
loss(x) = Flux.mse(w * x, w1 * x)
|
||||
|
||||
loss(rand(10)) # around 9
|
||||
|
||||
for t = 1:10^5
|
||||
θ = Params([w, w1])
|
||||
θ̄ = gradient(() -> loss(rand(10)), θ)
|
||||
Flux.Optimise.update!(opt, θ, θ̄)
|
||||
end
|
||||
|
||||
loss(rand(10)) # around 0.9</code></pre><p>In this manner it is possible to compose optimisers for some added flexibility.</p><h2><a class="nav-anchor" id="Decays-1" href="#Decays-1">Decays</a></h2><p>Similar to optimisers, Flux also defines some simple decays that can be used in conjunction with other optimisers, or standalone.</p><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.ExpDecay" href="#Flux.Optimise.ExpDecay"><code>Flux.Optimise.ExpDecay</code></a> — <span class="docstring-category">Type</span>.</div><div><div><p>ExpDecay(eta, decay, decay_step, clip)</p><p>Discount the learning rate <code>eta</code> by <code>decay</code> every <code>decay_step</code> till a minimum of <code>clip</code>.</p><p><strong>Parameters</strong></p><ul><li>Learning Rate (eta): Defaults to <code>0.001</code>.</li><li>decay: Factor by which the learning rate is discounted. Defaults to <code>0.1</code>.</li><li>decay_step: Schedules decay operations by setting number of steps between two decay operations. Defaults to <code>1000</code>.</li><li>clip: Minimum value of learning rate. Defaults to <code>1e-4</code>.</li></ul><p><strong>Example</strong></p><p>To apply exponential decay to an optimiser:</p><pre><code class="language-julia"> Optimiser(ExpDecay(..), Opt(..))
|
||||
|
||||
opt = Optimiser(ExpDecay(), ADAM())</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L472-L490">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.InvDecay" href="#Flux.Optimise.InvDecay"><code>Flux.Optimise.InvDecay</code></a> — <span class="docstring-category">Type</span>.</div><div><div><p>InvDecay(γ)</p><p>Applies inverse time decay to an optimiser</p><p><strong>Parameters</strong></p><ul><li>gamma (γ): Defaults to <code>0.001</code></li></ul><p><strong>Example</strong></p><pre><code class="language-julia"> Optimiser(InvDecay(..), Opt(..))</code></pre></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L444-L456">source</a></section><section class="docstring"><div class="docstring-header"><a class="docstring-binding" id="Flux.Optimise.WeightDecay" href="#Flux.Optimise.WeightDecay"><code>Flux.Optimise.WeightDecay</code></a> — <span class="docstring-category">Type</span>.</div><div><div><p>WeightDecay(wd)</p><p>Decays the weight by <code>wd</code></p><p><strong>Parameters</strong></p><ul><li>weight decay (wd): 0</li></ul></div></div><a class="source-link" target="_blank" href="https://github.com/FluxML/Flux.jl/blob/f46b5243dbc762081ccfbe991690750b307e2fc2/src/optimise/optimisers.jl#L511-L518">source</a></section><footer><hr/><a class="previous" href="../../models/layers/"><span class="direction">Previous</span><span class="title">Model Reference</span></a><a class="next" href="../training/"><span class="direction">Next</span><span class="title">Training</span></a></footer></article></body></html>
|
|
@ -0,0 +1,38 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en"><head><meta charset="UTF-8"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><title>Training · Flux</title><script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-36890222-9', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script><link href="https://cdnjs.cloudflare.com/ajax/libs/normalize/4.2.0/normalize.min.css" rel="stylesheet" type="text/css"/><link href="https://fonts.googleapis.com/css?family=Lato|Roboto+Mono" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" type="text/css"/><link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css" rel="stylesheet" type="text/css"/><script>documenterBaseURL="../.."</script><script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.2.0/require.min.js" data-main="../../assets/documenter.js"></script><script src="../../siteinfo.js"></script><script src="../../../versions.js"></script><link href="../../assets/documenter.css" rel="stylesheet" type="text/css"/><link href="../../assets/flux.css" rel="stylesheet" type="text/css"/></head><body><nav class="toc"><h1>Flux</h1><select id="version-selector" onChange="window.location.href=this.value" style="visibility: hidden"></select><form class="search" id="search-form" action="../../search/"><input id="search-query" name="q" type="text" placeholder="Search docs"/></form><ul><li><a class="toctext" href="../../">Home</a></li><li><span class="toctext">Building Models</span><ul><li><a class="toctext" href="../../models/basics/">Basics</a></li><li><a class="toctext" href="../../models/recurrence/">Recurrence</a></li><li><a class="toctext" href="../../models/regularisation/">Regularisation</a></li><li><a class="toctext" href="../../models/layers/">Model Reference</a></li></ul></li><li><span class="toctext">Training Models</span><ul><li><a class="toctext" href="../optimisers/">Optimisers</a></li><li class="current"><a class="toctext" href>Training</a><ul class="internal"><li><a class="toctext" href="#Loss-Functions-1">Loss Functions</a></li><li><a class="toctext" href="#Model-parameters-1">Model parameters</a></li><li><a class="toctext" href="#Datasets-1">Datasets</a></li><li><a class="toctext" href="#Callbacks-1">Callbacks</a></li></ul></li></ul></li><li><a class="toctext" href="../../data/onehot/">One-Hot Encoding</a></li><li><a class="toctext" href="../../gpu/">GPU Support</a></li><li><a class="toctext" href="../../saving/">Saving & Loading</a></li><li><a class="toctext" href="../../performance/">Performance Tips</a></li><li><a class="toctext" href="../../community/">Community</a></li></ul></nav><article id="docs"><header><nav><ul><li>Training Models</li><li><a href>Training</a></li></ul><a class="edit-page" href="https://github.com/FluxML/Flux.jl/blob/master/docs/src/training/training.md"><span class="fa"></span> Edit on GitHub</a></nav><hr/><div id="topbar"><span>Training</span><a class="fa fa-bars" href="#"></a></div></header><h1><a class="nav-anchor" id="Training-1" href="#Training-1">Training</a></h1><p>To actually train a model we need four things:</p><ul><li>A <em>objective function</em>, that evaluates how well a model is doing given some input data.</li><li>The trainable parameters of the model.</li><li>A collection of data points that will be provided to the objective function.</li><li>An <a href="../optimisers/">optimiser</a> that will update the model parameters appropriately.</li></ul><p>With these we can call <code>Flux.train!</code>:</p><pre><code class="language-julia">Flux.train!(objective, params, data, opt)</code></pre><p>There are plenty of examples in the <a href="https://github.com/FluxML/model-zoo">model zoo</a>.</p><h2><a class="nav-anchor" id="Loss-Functions-1" href="#Loss-Functions-1">Loss Functions</a></h2><p>The objective function must return a number representing how far the model is from its target – the <em>loss</em> of the model. The <code>loss</code> function that we defined in <a href="../../models/basics/">basics</a> will work as an objective. We can also define an objective in terms of some model:</p><pre><code class="language-julia">m = Chain(
|
||||
Dense(784, 32, σ),
|
||||
Dense(32, 10), softmax)
|
||||
|
||||
loss(x, y) = Flux.mse(m(x), y)
|
||||
ps = Flux.params(m)
|
||||
|
||||
# later
|
||||
Flux.train!(loss, ps, data, opt)</code></pre><p>The objective will almost always be defined in terms of some <em>cost function</em> that measures the distance of the prediction <code>m(x)</code> from the target <code>y</code>. Flux has several of these built in, like <code>mse</code> for mean squared error or <code>crossentropy</code> for cross entropy loss, but you can calculate it however you want.</p><p>At first glance it may seem strange that the model that we want to train is not part of the input arguments of <code>Flux.train!</code> too. However the target of the optimizer is not the model itself, but the objective function that represents the departure between modelled and observed data. In other words, the model is implicitly defined in the objective function, and there is no need to give it explicitly. Passing the objective function instead of the model and a cost function separately provides more flexibility, and the possibility of optimizing the calculations.</p><h2><a class="nav-anchor" id="Model-parameters-1" href="#Model-parameters-1">Model parameters</a></h2><p>The model to be trained must have a set of tracked parameters that are used to calculate the gradients of the objective function. In the <a href="../../models/basics/">basics</a> section it is explained how to create models with such parameters. The second argument of the function <code>Flux.train!</code> must be an object containing those parameters, which can be obtained from a model <code>m</code> as <code>params(m)</code>.</p><p>Such an object contains a reference to the model's parameters, not a copy, such that after their training, the model behaves according to their updated values.</p><h2><a class="nav-anchor" id="Datasets-1" href="#Datasets-1">Datasets</a></h2><p>The <code>data</code> argument provides a collection of data to train with (usually a set of inputs <code>x</code> and target outputs <code>y</code>). For example, here's a dummy data set with only one data point:</p><pre><code class="language-julia">x = rand(784)
|
||||
y = rand(10)
|
||||
data = [(x, y)]</code></pre><p><code>Flux.train!</code> will call <code>loss(x, y)</code>, calculate gradients, update the weights and then move on to the next data point if there is one. We can train the model on the same data three times:</p><pre><code class="language-julia">data = [(x, y), (x, y), (x, y)]
|
||||
# Or equivalently
|
||||
data = Iterators.repeated((x, y), 3)</code></pre><p>It's common to load the <code>x</code>s and <code>y</code>s separately. In this case you can use <code>zip</code>:</p><pre><code class="language-julia">xs = [rand(784), rand(784), rand(784)]
|
||||
ys = [rand( 10), rand( 10), rand( 10)]
|
||||
data = zip(xs, ys)</code></pre><p>Note that, by default, <code>train!</code> only loops over the data once (a single "epoch"). A convenient way to run multiple epochs from the REPL is provided by <code>@epochs</code>.</p><pre><code class="language-julia">julia> using Flux: @epochs
|
||||
|
||||
julia> @epochs 2 println("hello")
|
||||
INFO: Epoch 1
|
||||
hello
|
||||
INFO: Epoch 2
|
||||
hello
|
||||
|
||||
julia> @epochs 2 Flux.train!(...)
|
||||
# Train for two epochs</code></pre><h2><a class="nav-anchor" id="Callbacks-1" href="#Callbacks-1">Callbacks</a></h2><p><code>train!</code> takes an additional argument, <code>cb</code>, that's used for callbacks so that you can observe the training process. For example:</p><pre><code class="language-julia">train!(objective, ps, data, opt, cb = () -> println("training"))</code></pre><p>Callbacks are called for every batch of training data. You can slow this down using <code>Flux.throttle(f, timeout)</code> which prevents <code>f</code> from being called more than once every <code>timeout</code> seconds.</p><p>A more typical callback might look like this:</p><pre><code class="language-julia">test_x, test_y = # ... create single batch of test data ...
|
||||
evalcb() = @show(loss(test_x, test_y))
|
||||
|
||||
Flux.train!(objective, ps, data, opt,
|
||||
cb = throttle(evalcb, 5))</code></pre><p>Calling <code>Flux.stop()</code> in a callback will exit the training loop early.</p><pre><code class="language-julia">cb = function ()
|
||||
accuracy() > 0.9 && Flux.stop()
|
||||
end</code></pre><footer><hr/><a class="previous" href="../optimisers/"><span class="direction">Previous</span><span class="title">Optimisers</span></a><a class="next" href="../../data/onehot/"><span class="direction">Next</span><span class="title">One-Hot Encoding</span></a></footer></article></body></html>
|
|
@ -1,5 +1,6 @@
|
|||
var DOC_VERSIONS = [
|
||||
"stable",
|
||||
"v0.10",
|
||||
"v0.9",
|
||||
"v0.8",
|
||||
"v0.7",
|
||||
|
|
Loading…
Reference in New Issue