Flux.jl/src/layers/basic.jl

"""
    Chain(layers...)

Chain multiple layers / functions together, so that they are called in sequence
on a given input.

`Chain` also supports indexing and slicing, e.g. `m[2]` or `m[1:end-1]`.
`m[1:3](x)` will calculate the output of the first three layers.

# Examples
```jldoctest
julia> m = Chain(x -> x^2, x -> x+1);

julia> m(5) == 26
true

julia> m = Chain(Dense(10, 5), Dense(5, 2));

julia> x = rand(10);

julia> m(x) == m[2](m[1](x))
true
```
"""
struct Chain{T<:Tuple}
  layers::T
  Chain(xs...) = new{typeof(xs)}(xs)
end

@forward Chain.layers Base.getindex, Base.length, Base.first, Base.last,
  Base.iterate, Base.lastindex

functor(::Type{<:Chain}, c) = c.layers, ls -> Chain(ls...)

applychain(::Tuple{}, x) = x
applychain(fs::Tuple, x) = applychain(tail(fs), first(fs)(x))

(c::Chain)(x) = applychain(c.layers, x)

Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...)

testmode!(m::Chain, mode = true) = (map(x -> testmode!(x, mode), m.layers); m)

function Base.show(io::IO, c::Chain)
  print(io, "Chain(")
  join(io, c.layers, ", ")
  print(io, ")")
end

"""
    outdims(c::Chain, isize)

Calculate the output dimensions given the input dimensions, `isize`.

```julia
m = Chain(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32))
outdims(m, (10, 10)) == (6, 6)
```
"""
outdims(c::Chain, isize) = foldl(∘, map(l -> (x -> outdims(l, x)), c.layers))(isize)

# This is a temporary and naive implementation
# it might be replaced in the future for better performance
# see issue https://github.com/FluxML/Flux.jl/issues/702
# Johnny Chen -- @johnnychen94
# only slightly changed to better handle interaction with Zygote @dsweber2
"""
    activations(c::Chain, input)

Calculate the forward results of each layers in Chain `c` with `input` as model input.
"""
function activations(c::Chain, input)
    extraChain(c.layers, input)
end

function extraChain(fs::Tuple, x)
    res = first(fs)(x)
    return (res, extraChain(Base.tail(fs), res)...)
end

extraChain(::Tuple{}, x) = ()


"""
    Dense(in::Integer, out::Integer, σ = identity)

Create a traditional `Dense` layer with parameters `W` and `b`.

    y = σ.(W * x .+ b)

The input `x` must be a vector of length `in`, or a batch of vectors represented
as an `in × N` matrix. The out `y` will be a vector or batch of length `out`.

# Examples
```jldoctest; setup = :(using Random; Random.seed!(0))
julia> d = Dense(5, 2)
Dense(5, 2)

julia> d(rand(5))
2-element Array{Float32,1}:
  -0.16210233
   0.12311903```
"""
struct Dense{F,S<:AbstractArray,T<:AbstractArray}
  W::S
  b::T
  σ::F
end

Dense(W, b) = Dense(W, b, identity)

function Dense(in::Integer, out::Integer, σ = identity;
               initW = glorot_uniform, initb = zeros)
  return Dense(initW(out, in), initb(out), σ)
end

@functor Dense

function (a::Dense)(x::AbstractArray)
  W, b, σ = a.W, a.b, a.σ
  σ.(W*x .+ b)
end

function Base.show(io::IO, l::Dense)
  print(io, "Dense(", size(l.W, 2), ", ", size(l.W, 1))
  l.σ == identity || print(io, ", ", l.σ)
  print(io, ")")
end

# Try to avoid hitting generic matmul in some simple cases
# Base's matmul is so slow that it's worth the extra conversion to hit BLAS
(a::Dense{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
  invoke(a, Tuple{AbstractArray}, x)

(a::Dense{<:Any,W})(x::AbstractArray{<:AbstractFloat}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
  a(T.(x))

"""
    outdims(l::Dense, isize)

Calculate the output dimensions given the input dimensions, `isize`.

```julia
m = Dense(10, 5)
outdims(m, (5, 2)) == (5,)
outdims(m, (10,)) == (5,)
```
"""
outdims(l::Dense, isize) = (size(l.W)[1],)

"""
    Diagonal(in::Integer)

Create an element-wise linear transformation layer with learnable
vectors `α` and `β`:

    y = α .* x .+ β

The input `x` must be a array where `size(x, 1) == in`.
"""
struct Diagonal{T}
  α::T
  β::T
end

Diagonal(in::Integer; initα = ones, initβ = zeros) =
  Diagonal(initα(in), initβ(in))

@functor Diagonal

function (a::Diagonal)(x)
  α, β = a.α, a.β
  α.*x .+ β
end

function Base.show(io::IO, l::Diagonal)
  print(io, "Diagonal(", length(l.α), ")")
end

outdims(l::Diagonal, isize) = (length(l.α),)

"""
    Maxout(over)

The [Maxout](https://arxiv.org/pdf/1302.4389.pdf) layer has a number of
internal layers which all receive the same input. It returns the elementwise
maximum of the internal layers' outputs.

Maxout over linear dense layers satisfies the univeral approximation theorem.
"""
struct Maxout{FS<:Tuple}
    over::FS
end

"""
    Maxout(f, n_alts)

Construct a Maxout layer over `n_alts` instances of the layer given by `f`.
The function takes no arguments and should return some callable layer.
Conventionally, this is a linear dense layer.

# Examples

This constructs a `Maxout` layer over 4 internal dense linear layers, each
identical in structure (784 inputs, 128 outputs):
```julia
insize = 784
outsize = 128
Maxout(()->Dense(insize, outsize), 4)
```
"""
function Maxout(f, n_alts)
  over = Tuple(f() for _ in 1:n_alts)
  return Maxout(over)
end

@functor Maxout

function (mo::Maxout)(input::AbstractArray)
    mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over)
end

outdims(l::Maxout, isize) = outdims(first(l.over), isize)

"""
    SkipConnection(layer, connection)

Create a skip connection which consists of a layer or `Chain` of consecutive
layers and a shortcut connection linking the block's input to the output
through a user-supplied 2-argument callable. The first argument to the callable
will be propagated through the given `layer` while the second is the unchanged,
"skipped" input.

The simplest "ResNet"-type connection is just `SkipConnection(layer, +)`,
and requires the output of the layers to be the same shape as the input.
Here is a more complicated example:
```julia
m = Conv((3,3), 4=>7, pad=(1,1))
x = ones(5,5,4,10);
size(m(x)) == (5, 5, 7, 10)

sm = SkipConnection(m, (mx, x) -> cat(mx, x, dims=3))
size(sm(x)) == (5, 5, 11, 10)
```
"""
struct SkipConnection
  layers
  connection  #user can pass arbitrary connections here, such as (a,b) -> a + b
end

@functor SkipConnection

function (skip::SkipConnection)(input)
  skip.connection(skip.layers(input), input)
end

function Base.show(io::IO, b::SkipConnection)
  print(io, "SkipConnection(", b.layers, ", ", b.connection, ")")
end
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								"""
 								    Chain(layers...)
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								Chain multiple layers / functions together, so that they are called in sequence
 								on a given input.
 								`Chain` also supports indexing and slicing, e.g. `m[2]` or `m[1:end-1]`.
-												chain utility note

											
										
										
											2017-09-10 00:02:48 +00:00
+								`m[1:3](x)` will calculate the output of the first three layers.
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
 								# Examples
 								```jldoctest
 								julia> m = Chain(x -> x^2, x -> x+1);
 								julia> m(5) == 26
 								true
 								julia> m = Chain(Dense(10, 5), Dense(5, 2));
 								julia> x = rand(10);
 								julia> m(x) == m[2](m[1](x))
 								true
 								```
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								"""
-												immutable chain

											
										
										
											2018-11-16 12:22:15 +00:00
+								struct Chain{T<:Tuple}
 								  layers::T
 								  Chain(xs...) = new{typeof(xs)}(xs)
-												a bunch of stuff

											
										
										
											2016-08-25 21:49:21 +00:00
+								end
-												make chain collectable

											
										
										
											2019-01-16 14:51:37 +00:00
+								@forward Chain.layers Base.getindex, Base.length, Base.first, Base.last,
 								  Base.iterate, Base.lastindex
-												a bunch of stuff

											
										
										
											2016-08-25 21:49:21 +00:00
-												update for functors.jl change

											
										
										
											2020-04-14 14:21:45 +00:00
+								functor(::Type{<:Chain}, c) = c.layers, ls -> Chain(ls...)
-												param collection

											
										
										
											2017-08-22 16:13:03 +00:00
-												immutable chain

											
										
										
											2018-11-16 12:22:15 +00:00
+								applychain(::Tuple{}, x) = x
 								applychain(fs::Tuple, x) = applychain(tail(fs), first(fs)(x))
 								(c::Chain)(x) = applychain(c.layers, x)
-												training julia models

											
										
										
											2017-06-12 11:39:34 +00:00
-												better alternative to basemodel

											
										
										
											2017-02-28 16:42:48 +00:00
+								Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...)
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												Changed testmode! to return model

											
										
										
											2020-02-29 22:09:59 +00:00
+								testmode!(m::Chain, mode = true) = (map(x -> testmode!(x, mode), m.layers); m)
-												Updated to place function definitions in the appropriate places.

											
										
										
											2020-02-21 21:10:28 +00:00
-												nicer show

											
										
										
											2017-08-21 16:20:09 +00:00
+								function Base.show(io::IO, c::Chain)
 								  print(io, "Chain(")
 								  join(io, c.layers, ", ")
 								  print(io, ")")
 								end
-												Added outdims for some basic layers

											
										
										
											2019-12-04 04:48:48 +00:00
+								"""
-												Updated with all basic and conv layers outdims

											
										
										
											2019-12-06 03:57:10 +00:00
+								    outdims(c::Chain, isize)
-												Added outdims for some basic layers

											
										
										
											2019-12-04 04:48:48 +00:00
 								Calculate the output dimensions given the input dimensions, `isize`.
 								```julia
 								m = Chain(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32))
 								outdims(m, (10, 10)) == (6, 6)
 								```
 								"""
-												Added tests for outdims

											
										
										
											2019-12-06 04:54:25 +00:00
+								outdims(c::Chain, isize) = foldl(∘, map(l -> (x -> outdims(l, x)), c.layers))(isize)
-												naive implementation of activations

											
										
										
											2019-03-28 09:07:04 +00:00
-												fix a typo in comment

`inplementation` --> `implementation`
											
										
										
											2019-04-05 11:19:30 +00:00
+								# This is a temporary and naive implementation
 								# it might be replaced in the future for better performance
-												correct the function behavior; support Any type

											
										
										
											2019-04-05 10:16:44 +00:00
+								# see issue https://github.com/FluxML/Flux.jl/issues/702
-												simplify the implementation

											
										
										
											2019-04-05 10:44:00 +00:00
+								# Johnny Chen -- @johnnychen94
-												make activations zygote friendly

											
										
										
											2019-09-10 07:54:49 +00:00
+								# only slightly changed to better handle interaction with Zygote @dsweber2
-												naive implementation of activations

											
										
										
											2019-03-28 09:07:04 +00:00
+								"""
-												simplify the implementation

											
										
										
											2019-04-05 10:44:00 +00:00
+								    activations(c::Chain, input)
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
-												simplify the implementation

											
										
										
											2019-04-05 10:44:00 +00:00
+								Calculate the forward results of each layers in Chain `c` with `input` as model input.
-												naive implementation of activations

											
										
										
											2019-03-28 09:07:04 +00:00
+								"""
-												simplify the implementation

											
										
										
											2019-04-05 10:44:00 +00:00
+								function activations(c::Chain, input)
-												recursive way of doing activations

											
										
										
											2019-09-12 00:36:37 +00:00
+								    extraChain(c.layers, input)
-												naive implementation of activations

											
										
										
											2019-03-28 09:07:04 +00:00
+								end
-												recursive way of doing activations

											
										
										
											2019-09-12 00:36:37 +00:00
+								function extraChain(fs::Tuple, x)
 								    res = first(fs)(x)
 								    return (res, extraChain(Base.tail(fs), res)...)
 								end
-												simpler test

											
										
										
											2019-11-14 22:05:53 +00:00
+								extraChain(::Tuple{}, x) = ()
-												recursive way of doing activations

											
										
										
											2019-09-12 00:36:37 +00:00
-												Chain `activations`

											
										
										
											2018-06-26 13:30:46 +00:00
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								"""
 								    Dense(in::Integer, out::Integer, σ = identity)
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								Create a traditional `Dense` layer with parameters `W` and `b`.
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								    y = σ.(W * x .+ b)
-												docs updates

											
										
										
											2017-09-09 23:58:32 +00:00
 								The input `x` must be a vector of length `in`, or a batch of vectors represented
-												typoe

											
										
										
											2017-10-18 11:48:58 +00:00
+								as an `in × N` matrix. The out `y` will be a vector or batch of length `out`.
-												dense layer example

											
										
										
											2017-10-18 11:47:45 +00:00
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								# Examples
 								```jldoctest; setup = :(using Random; Random.seed!(0))
-												syntax highlighting

											
										
										
											2017-10-18 14:44:06 +00:00
+								julia> d = Dense(5, 2)
 								Dense(5, 2)
-												dense layer example

											
										
										
											2017-10-18 11:47:45 +00:00
-												syntax highlighting

											
										
										
											2017-10-18 14:44:06 +00:00
+								julia> d(rand(5))
-												Fix problems due to rebase

											
										
										
											2020-04-04 20:55:14 +00:00
+-element Array{Float32,1}:
 								  -0.16210233
 .12311903```
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								"""
-												Require weight and bias to be AbstractArrays

											
										
										
											2020-06-10 11:06:57 +00:00
+								struct Dense{F,S<:AbstractArray,T<:AbstractArray}
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
+								  W::S
 								  b::T
-												easier initialisation with weights

											
										
										
											2018-02-15 20:52:29 +00:00
+								  σ::F
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
+								end
-												easier initialisation with weights

											
										
										
											2018-02-15 20:52:29 +00:00
+								Dense(W, b) = Dense(W, b, identity)
-												Add glorot (Xavier) initialization

Set default `Dense` and `RNN` inits to `glorot_uniform()` for `W`, `zeros` for `b`.

											
										
										
											2017-12-05 07:47:03 +00:00
+								function Dense(in::Integer, out::Integer, σ = identity;
 								               initW = glorot_uniform, initb = zeros)
-												rm data/param

											
										
										
											2019-03-08 12:13:58 +00:00
+								  return Dense(initW(out, in), initb(out), σ)
-												Add glorot (Xavier) initialization

Set default `Dense` and `RNN` inits to `glorot_uniform()` for `W`, `zeros` for `b`.

											
										
										
											2017-12-05 07:47:03 +00:00
+								end
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												internal rename

											
										
										
											2019-09-19 14:53:31 +00:00
+								@functor Dense
-												param collection

											
										
										
											2017-08-22 16:13:03 +00:00
-												Fix issue #354

											
										
										
											2018-08-23 13:34:11 +00:00
+								function (a::Dense)(x::AbstractArray)
-												beginnings of gpu support

											
										
										
											2017-09-27 20:58:34 +00:00
+								  W, b, σ = a.W, a.b, a.σ
-												WIP 1.0 support
closes #353

											
										
										
											2018-08-20 12:08:04 +00:00
+								  σ.(W*x .+ b)
-												beginnings of gpu support

											
										
										
											2017-09-27 20:58:34 +00:00
+								end
-												nicer show

											
										
										
											2017-08-21 16:20:09 +00:00
-												clearer name for dense

											
										
										
											2017-09-02 20:50:11 +00:00
+								function Base.show(io::IO, l::Dense)
 								  print(io, "Dense(", size(l.W, 2), ", ", size(l.W, 1))
-												nicer show

											
										
										
											2017-08-21 16:20:09 +00:00
+								  l.σ == identity || print(io, ", ", l.σ)
 								  print(io, ")")
 								end
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												move Dense's overloads to be near its defn

											
										
										
											2019-02-27 11:46:20 +00:00
+								# Try to avoid hitting generic matmul in some simple cases
 								# Base's matmul is so slow that it's worth the extra conversion to hit BLAS
 								(a::Dense{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
 								  invoke(a, Tuple{AbstractArray}, x)
-												fixes #645
fixes #831

											
										
										
											2019-08-09 12:53:11 +00:00
+								(a::Dense{<:Any,W})(x::AbstractArray{<:AbstractFloat}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
-												move Dense's overloads to be near its defn

											
										
										
											2019-02-27 11:46:20 +00:00
+								  a(T.(x))
-												Added outdims for some basic layers

											
										
										
											2019-12-04 04:48:48 +00:00
+								"""
 								    outdims(l::Dense, isize)
 								Calculate the output dimensions given the input dimensions, `isize`.
 								```julia
 								m = Dense(10, 5)
 								outdims(m, (5, 2)) == (5,)
 								outdims(m, (10,)) == (5,)
 								```
 								"""
-												Added tests for outdims

											
										
										
											2019-12-06 04:54:25 +00:00
+								outdims(l::Dense, isize) = (size(l.W)[1],)
-												Added outdims for some basic layers

											
										
										
											2019-12-04 04:48:48 +00:00
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								"""
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								    Diagonal(in::Integer)
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								Create an element-wise linear transformation layer with learnable
-												std derivative

											
										
										
											2017-11-21 16:04:04 +00:00
+								vectors `α` and `β`:
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												std derivative

											
										
										
											2017-11-21 16:04:04 +00:00
+								    y = α .* x .+ β
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								The input `x` must be a array where `size(x, 1) == in`.
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								"""
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								struct Diagonal{T}
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								  α::T
 								  β::T
 								end
-												removed zeros fix

											
										
										
											2018-07-17 15:13:55 +00:00
+								Diagonal(in::Integer; initα = ones, initβ = zeros) =
-												rm data/param

											
										
										
											2019-03-08 12:13:58 +00:00
+								  Diagonal(initα(in), initβ(in))
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												internal rename

											
										
										
											2019-09-19 14:53:31 +00:00
+								@functor Diagonal
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								function (a::Diagonal)(x)
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								  α, β = a.α, a.β
 								  α.*x .+ β
 								end
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								function Base.show(io::IO, l::Diagonal)
 								  print(io, "Diagonal(", length(l.α), ")")
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								end
-												float32 param initialisers

											
										
										
											2018-09-07 00:25:32 +00:00
-												Added outdims for some basic layers

											
										
										
											2019-12-04 04:48:48 +00:00
+								outdims(l::Diagonal, isize) = (length(l.α),)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
 								"""
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								    Maxout(over)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
-												Further docstring improvements in src/

Some had to be re-done after the rebase

											
										
										
											2020-04-04 20:59:45 +00:00
+								The [Maxout](https://arxiv.org/pdf/1302.4389.pdf) layer has a number of
 								internal layers which all receive the same input. It returns the elementwise
 								maximum of the internal layers' outputs.
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
 								Maxout over linear dense layers satisfies the univeral approximation theorem.
 								"""
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								struct Maxout{FS<:Tuple}
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								    over::FS
 								end
 								"""
-												no arg closures

											
										
										
											2019-03-21 17:04:52 +00:00
+								    Maxout(f, n_alts)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								Construct a Maxout layer over `n_alts` instances of the layer given by `f`.
 								The function takes no arguments and should return some callable layer.
 								Conventionally, this is a linear dense layer.
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								# Examples
 								This constructs a `Maxout` layer over 4 internal dense linear layers, each
 								identical in structure (784 inputs, 128 outputs):
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								```julia
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								insize = 784
 								outsize = 128
 								Maxout(()->Dense(insize, outsize), 4)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								```
 								"""
-												no arg closures

											
										
										
											2019-03-21 17:04:52 +00:00
+								function Maxout(f, n_alts)
 								  over = Tuple(f() for _ in 1:n_alts)
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								  return Maxout(over)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								end
-												internal rename

											
										
										
											2019-09-19 14:53:31 +00:00
+								@functor Maxout
-												make Maxout trainable

											
										
										
											2019-03-25 16:02:46 +00:00
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								function (mo::Maxout)(input::AbstractArray)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								    mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over)
 								end
-												No ops defined for param and data

											
										
										
											2019-06-10 12:54:18 +00:00
-												Added tests for outdims

											
										
										
											2019-12-06 04:54:25 +00:00
+								outdims(l::Maxout, isize) = outdims(first(l.over), isize)
-												Updated with all basic and conv layers outdims

											
										
										
											2019-12-06 03:57:10 +00:00
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								"""
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								    SkipConnection(layer, connection)
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								Create a skip connection which consists of a layer or `Chain` of consecutive
 								layers and a shortcut connection linking the block's input to the output
 								through a user-supplied 2-argument callable. The first argument to the callable
 								will be propagated through the given `layer` while the second is the unchanged,
 								"skipped" input.
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								The simplest "ResNet"-type connection is just `SkipConnection(layer, +)`,
-												eg

											
										
										
											2019-09-25 13:18:40 +00:00
+								and requires the output of the layers to be the same shape as the input.
 								Here is a more complicated example:
-												Improve docstrings

Improvements like...
   - fixing typos,
   - removing trailing and double whitespaces,
   - using `jldoctest` blocks where applicable,
   - fixing, updating or correctly setting up existing doctests,
   - improving consistency (for example, always use "# Examples" instead
     of other variants),
   - removing empty lines between docstrings and functions,
   - instead of mentioning keywords, put them into the docstring,
   - adding some missing but useful keywords,
   - adding references (`@ref`),
   - using LaTeX math where applicable, and
   - linking papers.

Debatable stuff that is untouched:
   - BE/AE s/z irregularities ("normalise" versus "normalize") since
     most papers use the AE version while the Flux source code was
     written with BE spelling.
   - Names of normalization functions are capitalized
     ("Batch Normalization" instead of "batch normalization").

											
										
										
											2019-08-31 09:39:28 +00:00
+								```julia
-												eg

											
										
										
											2019-09-25 13:18:40 +00:00
+								m = Conv((3,3), 4=>7, pad=(1,1))
 								x = ones(5,5,4,10);
 								size(m(x)) == (5, 5, 7, 10)
-												Added news and removed type annotation from SkipConnection structure

											
										
										
											2019-05-13 16:47:46 +00:00
-												eg

											
										
										
											2019-09-25 13:18:40 +00:00
+								sm = SkipConnection(m, (mx, x) -> cat(mx, x, dims=3))
 								size(sm(x)) == (5, 5, 11, 10)
-												Added news and removed type annotation from SkipConnection structure

											
										
										
											2019-05-13 16:47:46 +00:00
+								```
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								"""
 								struct SkipConnection
 								  layers
-												Added news and removed type annotation from SkipConnection structure

											
										
										
											2019-05-13 16:47:46 +00:00
+								  connection  #user can pass arbitrary connections here, such as (a,b) -> a + b
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								end
-												internal rename

											
										
										
											2019-09-19 14:53:31 +00:00
+								@functor SkipConnection
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
-												Added news and removed type annotation from SkipConnection structure

											
										
										
											2019-05-13 16:47:46 +00:00
+								function (skip::SkipConnection)(input)
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								  skip.connection(skip.layers(input), input)
 								end
 								function Base.show(io::IO, b::SkipConnection)
-												simplify

											
										
										
											2019-09-25 11:59:32 +00:00
+								  print(io, "SkipConnection(", b.layers, ", ", b.connection, ")")
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								end