Flux.jl/src/layers/basic.jl

"""
    Chain(layers...)

Chain multiple layers / functions together, so that they are called in sequence
on a given input.

```julia
m = Chain(x -> x^2, x -> x+1)
m(5) == 26

m = Chain(Dense(10, 5), Dense(5, 2))
x = rand(10)
m(x) == m[2](m[1](x))
```

`Chain` also supports indexing and slicing, e.g. `m[2]` or `m[1:end-1]`.
`m[1:3](x)` will calculate the output of the first three layers.
"""
struct Chain{T<:Tuple}
  layers::T
  Chain(xs...) = new{typeof(xs)}(xs)
end

@forward Chain.layers Base.getindex, Base.length, Base.first, Base.last,
  Base.iterate, Base.lastindex

functor(c::Chain) = c.layers, ls -> Chain(ls...)

applychain(::Tuple{}, x) = x
applychain(fs::Tuple, x) = applychain(tail(fs), first(fs)(x))

(c::Chain)(x) = applychain(c.layers, x)

Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...)

function Base.show(io::IO, c::Chain)
  print(io, "Chain(")
  join(io, c.layers, ", ")
  print(io, ")")
end


# This is a temporary and naive implementation
# it might be replaced in the future for better performance
# see issue https://github.com/FluxML/Flux.jl/issues/702
# Johnny Chen -- @johnnychen94
"""
    activations(c::Chain, input)
Calculate the forward results of each layers in Chain `c` with `input` as model input.
"""
function activations(c::Chain, input)
  rst = []
  for l in c
    x = get(rst, length(rst), input)
    push!(rst, l(x))
  end
  return rst
end


"""
    Dense(in::Integer, out::Integer, σ = identity)

Creates a traditional `Dense` layer with parameters `W` and `b`.

    y = σ.(W * x .+ b)

The input `x` must be a vector of length `in`, or a batch of vectors represented
as an `in × N` matrix. The out `y` will be a vector or batch of length `out`.

```julia
julia> d = Dense(5, 2)
Dense(5, 2)

julia> d(rand(5))
Tracked 2-element Array{Float64,1}:
  0.00257447
  -0.00449443
```
"""
struct Dense{F,S,T}
  W::S
  b::T
  σ::F
end

Dense(W, b) = Dense(W, b, identity)

function Dense(in::Integer, out::Integer, σ = identity;
               initW = glorot_uniform, initb = zeros)
  return Dense(initW(out, in), initb(out), σ)
end

@functor Dense

function (a::Dense)(x::AbstractArray)
  W, b, σ = a.W, a.b, a.σ
  σ.(W*x .+ b)
end

function Base.show(io::IO, l::Dense)
  print(io, "Dense(", size(l.W, 2), ", ", size(l.W, 1))
  l.σ == identity || print(io, ", ", l.σ)
  print(io, ")")
end

# Try to avoid hitting generic matmul in some simple cases
# Base's matmul is so slow that it's worth the extra conversion to hit BLAS
(a::Dense{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
  invoke(a, Tuple{AbstractArray}, x)

(a::Dense{<:Any,W})(x::AbstractArray{<:AbstractFloat}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
  a(T.(x))

"""
    Diagonal(in::Integer)

Creates an element-wise linear transformation layer with learnable
vectors `α` and `β`:

    y = α .* x .+ β

The input `x` must be a array where `size(x, 1) == in`.
"""
struct Diagonal{T}
  α::T
  β::T
end

Diagonal(in::Integer; initα = ones, initβ = zeros) =
  Diagonal(initα(in), initβ(in))

@functor Diagonal

function (a::Diagonal)(x)
  α, β = a.α, a.β
  α.*x .+ β
end

function Base.show(io::IO, l::Diagonal)
  print(io, "Diagonal(", length(l.α), ")")
end


"""
    Maxout(over)

`Maxout` is a neural network layer, which has a number of internal layers,
which all have the same input, and the maxout returns the elementwise maximium
of the internal layers' outputs.

Maxout over linear dense layers satisfies the univeral approximation theorem.

Reference:
Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron Courville, and Yoshua Bengio.
2013. Maxout networks.
In Proceedings of the 30th International Conference on International Conference on Machine Learning - Volume 28 (ICML'13),
Sanjoy Dasgupta and David McAllester (Eds.), Vol. 28. JMLR.org III-1319-III-1327.
https://arxiv.org/pdf/1302.4389.pdf
"""
struct Maxout{FS<:Tuple}
    over::FS
end

"""
    Maxout(f, n_alts)

Constructs a Maxout layer over `n_alts` instances of  the layer given  by `f`.
The function takes no arguement and should return some callable layer.
Conventionally this is a linear dense layer.

For example the following example which
will construct a `Maxout` layer over 4 internal dense linear layers,
each identical in structure (784 inputs, 128 outputs).
```julia
    insize = 784
    outsize = 128
    Maxout(()->Dense(insize, outsize), 4)
```
"""
function Maxout(f, n_alts)
  over = Tuple(f() for _ in 1:n_alts)
  return Maxout(over)
end

@functor Maxout

function (mo::Maxout)(input::AbstractArray)
    mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over)
end

"""
    SkipConnection(layers, connection)

Creates a Skip Connection, of a layer or `Chain` of consecutive layers
plus a shortcut connection. The connection function will combine the result of the layers
with the original input, to give the final output.

The simplest 'ResNet'-type connection is just `SkipConnection(layer, +)`,
and requires the output of the layers to be the same shape as the input.
Here is a more complicated example:
```
m = Conv((3,3), 4=>7, pad=(1,1))
x = ones(5,5,4,10);
size(m(x)) == (5, 5, 7, 10)

sm = SkipConnection(m, (mx, x) -> cat(mx, x, dims=3))
size(sm(x)) == (5, 5, 11, 10)
```
"""
function SkipConnection end
struct SkipConnection
  layers
  connection  #user can pass arbitrary connections here, such as (a,b) -> a + b
end

@functor SkipConnection

function (skip::SkipConnection)(input)
  skip.connection(skip.layers(input), input)
end

function Base.show(io::IO, b::SkipConnection)
  print(io, "SkipConnection(", b.layers, ", ", b.connection, ")")
end
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								"""
 								    Chain(layers...)
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								Chain multiple layers / functions together, so that they are called in sequence
 								on a given input.
-												syntax highlighting

											
										
										
											2017-10-18 14:44:06 +00:00
+								```julia
 								m = Chain(x -> x^2, x -> x+1)
 								m(5) == 26
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
-												syntax highlighting

											
										
										
											2017-10-18 14:44:06 +00:00
+								m = Chain(Dense(10, 5), Dense(5, 2))
 								x = rand(10)
 								m(x) == m[2](m[1](x))
 								```
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
 								`Chain` also supports indexing and slicing, e.g. `m[2]` or `m[1:end-1]`.
-												chain utility note

											
										
										
											2017-09-10 00:02:48 +00:00
+								`m[1:3](x)` will calculate the output of the first three layers.
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								"""
-												immutable chain

											
										
										
											2018-11-16 12:22:15 +00:00
+								struct Chain{T<:Tuple}
 								  layers::T
 								  Chain(xs...) = new{typeof(xs)}(xs)
-												a bunch of stuff

											
										
										
											2016-08-25 21:49:21 +00:00
+								end
-												make chain collectable

											
										
										
											2019-01-16 14:51:37 +00:00
+								@forward Chain.layers Base.getindex, Base.length, Base.first, Base.last,
 								  Base.iterate, Base.lastindex
-												a bunch of stuff

											
										
										
											2016-08-25 21:49:21 +00:00
-												functor refactor

											
										
										
											2019-09-19 14:22:11 +00:00
+								functor(c::Chain) = c.layers, ls -> Chain(ls...)
-												param collection

											
										
										
											2017-08-22 16:13:03 +00:00
-												immutable chain

											
										
										
											2018-11-16 12:22:15 +00:00
+								applychain(::Tuple{}, x) = x
 								applychain(fs::Tuple, x) = applychain(tail(fs), first(fs)(x))
 								(c::Chain)(x) = applychain(c.layers, x)
-												training julia models

											
										
										
											2017-06-12 11:39:34 +00:00
-												better alternative to basemodel

											
										
										
											2017-02-28 16:42:48 +00:00
+								Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...)
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												nicer show

											
										
										
											2017-08-21 16:20:09 +00:00
+								function Base.show(io::IO, c::Chain)
 								  print(io, "Chain(")
 								  join(io, c.layers, ", ")
 								  print(io, ")")
 								end
-												naive implementation of activations

											
										
										
											2019-03-28 09:07:04 +00:00
-												fix a typo in comment

`inplementation` --> `implementation`
											
										
										
											2019-04-05 11:19:30 +00:00
+								# This is a temporary and naive implementation
 								# it might be replaced in the future for better performance
-												correct the function behavior; support Any type

											
										
										
											2019-04-05 10:16:44 +00:00
+								# see issue https://github.com/FluxML/Flux.jl/issues/702
-												simplify the implementation

											
										
										
											2019-04-05 10:44:00 +00:00
+								# Johnny Chen -- @johnnychen94
-												naive implementation of activations

											
										
										
											2019-03-28 09:07:04 +00:00
+								"""
-												simplify the implementation

											
										
										
											2019-04-05 10:44:00 +00:00
+								    activations(c::Chain, input)
 								Calculate the forward results of each layers in Chain `c` with `input` as model input.
-												naive implementation of activations

											
										
										
											2019-03-28 09:07:04 +00:00
+								"""
-												simplify the implementation

											
										
										
											2019-04-05 10:44:00 +00:00
+								function activations(c::Chain, input)
 								  rst = []
 								  for l in c
 								    x = get(rst, length(rst), input)
 								    push!(rst, l(x))
-												change 4-spaces tab to 2-spaces tab


											
										
										
											2019-03-28 14:40:24 +00:00
+								  end
 								  return rst
-												naive implementation of activations

											
										
										
											2019-03-28 09:07:04 +00:00
+								end
-												Chain `activations`

											
										
										
											2018-06-26 13:30:46 +00:00
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								"""
 								    Dense(in::Integer, out::Integer, σ = identity)
 								Creates a traditional `Dense` layer with parameters `W` and `b`.
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								    y = σ.(W * x .+ b)
-												docs updates

											
										
										
											2017-09-09 23:58:32 +00:00
 								The input `x` must be a vector of length `in`, or a batch of vectors represented
-												typoe

											
										
										
											2017-10-18 11:48:58 +00:00
+								as an `in × N` matrix. The out `y` will be a vector or batch of length `out`.
-												dense layer example

											
										
										
											2017-10-18 11:47:45 +00:00
-												syntax highlighting

											
										
										
											2017-10-18 14:44:06 +00:00
+								```julia
 								julia> d = Dense(5, 2)
 								Dense(5, 2)
-												dense layer example

											
										
										
											2017-10-18 11:47:45 +00:00
-												syntax highlighting

											
										
										
											2017-10-18 14:44:06 +00:00
+								julia> d(rand(5))
 								Tracked 2-element Array{Float64,1}:
 .00257447
 								  -0.00449443
 								```
-												layer docs

											
										
										
											2017-09-08 21:52:41 +00:00
+								"""
-												clearer name for dense

											
										
										
											2017-09-02 20:50:11 +00:00
+								struct Dense{F,S,T}
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
+								  W::S
 								  b::T
-												easier initialisation with weights

											
										
										
											2018-02-15 20:52:29 +00:00
+								  σ::F
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
+								end
-												easier initialisation with weights

											
										
										
											2018-02-15 20:52:29 +00:00
+								Dense(W, b) = Dense(W, b, identity)
-												Add glorot (Xavier) initialization

Set default `Dense` and `RNN` inits to `glorot_uniform()` for `W`, `zeros` for `b`.

											
										
										
											2017-12-05 07:47:03 +00:00
+								function Dense(in::Integer, out::Integer, σ = identity;
 								               initW = glorot_uniform, initb = zeros)
-												rm data/param

											
										
										
											2019-03-08 12:13:58 +00:00
+								  return Dense(initW(out, in), initb(out), σ)
-												Add glorot (Xavier) initialization

Set default `Dense` and `RNN` inits to `glorot_uniform()` for `W`, `zeros` for `b`.

											
										
										
											2017-12-05 07:47:03 +00:00
+								end
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												internal rename

											
										
										
											2019-09-19 14:53:31 +00:00
+								@functor Dense
-												param collection

											
										
										
											2017-08-22 16:13:03 +00:00
-												Fix issue #354

											
										
										
											2018-08-23 13:34:11 +00:00
+								function (a::Dense)(x::AbstractArray)
-												beginnings of gpu support

											
										
										
											2017-09-27 20:58:34 +00:00
+								  W, b, σ = a.W, a.b, a.σ
-												WIP 1.0 support
closes #353

											
										
										
											2018-08-20 12:08:04 +00:00
+								  σ.(W*x .+ b)
-												beginnings of gpu support

											
										
										
											2017-09-27 20:58:34 +00:00
+								end
-												nicer show

											
										
										
											2017-08-21 16:20:09 +00:00
-												clearer name for dense

											
										
										
											2017-09-02 20:50:11 +00:00
+								function Base.show(io::IO, l::Dense)
 								  print(io, "Dense(", size(l.W, 2), ", ", size(l.W, 1))
-												nicer show

											
										
										
											2017-08-21 16:20:09 +00:00
+								  l.σ == identity || print(io, ", ", l.σ)
 								  print(io, ")")
 								end
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												move Dense's overloads to be near its defn

											
										
										
											2019-02-27 11:46:20 +00:00
+								# Try to avoid hitting generic matmul in some simple cases
 								# Base's matmul is so slow that it's worth the extra conversion to hit BLAS
 								(a::Dense{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
 								  invoke(a, Tuple{AbstractArray}, x)
-												fixes #645
fixes #831

											
										
										
											2019-08-09 12:53:11 +00:00
+								(a::Dense{<:Any,W})(x::AbstractArray{<:AbstractFloat}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
-												move Dense's overloads to be near its defn

											
										
										
											2019-02-27 11:46:20 +00:00
+								  a(T.(x))
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								"""
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								    Diagonal(in::Integer)
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
 								Creates an element-wise linear transformation layer with learnable
-												std derivative

											
										
										
											2017-11-21 16:04:04 +00:00
+								vectors `α` and `β`:
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												std derivative

											
										
										
											2017-11-21 16:04:04 +00:00
+								    y = α .* x .+ β
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								The input `x` must be a array where `size(x, 1) == in`.
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								"""
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								struct Diagonal{T}
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								  α::T
 								  β::T
 								end
-												removed zeros fix

											
										
										
											2018-07-17 15:13:55 +00:00
+								Diagonal(in::Integer; initα = ones, initβ = zeros) =
-												rm data/param

											
										
										
											2019-03-08 12:13:58 +00:00
+								  Diagonal(initα(in), initβ(in))
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												internal rename

											
										
										
											2019-09-19 14:53:31 +00:00
+								@functor Diagonal
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								function (a::Diagonal)(x)
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								  α, β = a.α, a.β
 								  α.*x .+ β
 								end
-												LayerNorm tweaks

											
										
										
											2017-10-23 11:53:07 +00:00
+								function Base.show(io::IO, l::Diagonal)
 								  print(io, "Diagonal(", length(l.α), ")")
-												adding layer normalization

											
										
										
											2017-10-10 20:33:37 +00:00
+								end
-												float32 param initialisers

											
										
										
											2018-09-07 00:25:32 +00:00
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
 								"""
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								    Maxout(over)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								`Maxout` is a neural network layer, which has a number of internal layers,
-												update docstring

											
										
										
											2019-03-11 21:40:29 +00:00
+								which all have the same input, and the maxout returns the elementwise maximium
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								of the internal layers' outputs.
 								Maxout over linear dense layers satisfies the univeral approximation theorem.
 								Reference:
 								Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron Courville, and Yoshua Bengio.
 . Maxout networks.
 								In Proceedings of the 30th International Conference on International Conference on Machine Learning - Volume 28 (ICML'13),
 								Sanjoy Dasgupta and David McAllester (Eds.), Vol. 28. JMLR.org III-1319-III-1327.
 								https://arxiv.org/pdf/1302.4389.pdf
 								"""
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								struct Maxout{FS<:Tuple}
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								    over::FS
 								end
 								"""
-												no arg closures

											
										
										
											2019-03-21 17:04:52 +00:00
+								    Maxout(f, n_alts)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								Constructs a Maxout layer over `n_alts` instances of  the layer given  by `f`.
-												update docstring

											
										
										
											2019-03-11 21:40:29 +00:00
+								The function takes no arguement and should return some callable layer.
 								Conventionally this is a linear dense layer.
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
-												fix docs

											
										
										
											2019-02-27 15:19:10 +00:00
+								For example the following example which
-												update docstring

											
										
										
											2019-03-11 21:40:29 +00:00
+								will construct a `Maxout` layer over 4 internal dense linear layers,
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								each identical in structure (784 inputs, 128 outputs).
 								```julia
 								    insize = 784
-												Fix typo in Maxout
											
										
										
											2019-04-19 21:02:26 +00:00
+								    outsize = 128
-												update docstring

											
										
										
											2019-03-11 21:40:29 +00:00
+								    Maxout(()->Dense(insize, outsize), 4)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								```
 								"""
-												no arg closures

											
										
										
											2019-03-21 17:04:52 +00:00
+								function Maxout(f, n_alts)
 								  over = Tuple(f() for _ in 1:n_alts)
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								  return Maxout(over)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								end
-												internal rename

											
										
										
											2019-09-19 14:53:31 +00:00
+								@functor Maxout
-												make Maxout trainable

											
										
										
											2019-03-25 16:02:46 +00:00
-												correct casing

											
										
										
											2019-03-06 18:22:46 +00:00
+								function (mo::Maxout)(input::AbstractArray)
-												Add MaxOut layer

											
										
										
											2019-02-27 12:04:59 +00:00
+								    mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over)
 								end
-												No ops defined for param and data

											
										
										
											2019-06-10 12:54:18 +00:00
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								"""
-												fix printing of SkipConnection

											
										
										
											2019-09-25 11:37:01 +00:00
+								    SkipConnection(layers, connection)
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
-												eg

											
										
										
											2019-09-25 13:18:40 +00:00
+								Creates a Skip Connection, of a layer or `Chain` of consecutive layers
 								plus a shortcut connection. The connection function will combine the result of the layers
 								with the original input, to give the final output.
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
-												eg

											
										
										
											2019-09-25 13:18:40 +00:00
+								The simplest 'ResNet'-type connection is just `SkipConnection(layer, +)`,
 								and requires the output of the layers to be the same shape as the input.
 								Here is a more complicated example:
 								```
 								m = Conv((3,3), 4=>7, pad=(1,1))
 								x = ones(5,5,4,10);
 								size(m(x)) == (5, 5, 7, 10)
-												Added news and removed type annotation from SkipConnection structure

											
										
										
											2019-05-13 16:47:46 +00:00
-												eg

											
										
										
											2019-09-25 13:18:40 +00:00
+								sm = SkipConnection(m, (mx, x) -> cat(mx, x, dims=3))
 								size(sm(x)) == (5, 5, 11, 10)
-												Added news and removed type annotation from SkipConnection structure

											
										
										
											2019-05-13 16:47:46 +00:00
+								```
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								"""
-												eg

											
										
										
											2019-09-25 13:18:40 +00:00
+								function SkipConnection end
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								struct SkipConnection
 								  layers
-												Added news and removed type annotation from SkipConnection structure

											
										
										
											2019-05-13 16:47:46 +00:00
+								  connection  #user can pass arbitrary connections here, such as (a,b) -> a + b
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								end
-												internal rename

											
										
										
											2019-09-19 14:53:31 +00:00
+								@functor SkipConnection
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
-												Added news and removed type annotation from SkipConnection structure

											
										
										
											2019-05-13 16:47:46 +00:00
+								function (skip::SkipConnection)(input)
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								  skip.connection(skip.layers(input), input)
 								end
 								function Base.show(io::IO, b::SkipConnection)
-												simplify

											
										
										
											2019-09-25 11:59:32 +00:00
+								  print(io, "SkipConnection(", b.layers, ", ", b.connection, ")")
-												Added the SkipConnection layer and constructor

Added missing export

Corrected channel placement

Dimension 4 cannot be assumed to always be the Channel dimension

Deprecation of `treelike`

Code now makes use of `@treelike` macro instead of the deprecated `treelike` function (it worked on my end because I'm on Julia 0.7, while Julia 1.0 deprecated stuff)

Update basic.jl

Renaming to SkipConnection

* Update Flux.jl

* Update basic.jl

Updated `SkipConnection` with a `connection` field

I'm pretty sure I broke something now, but this PR should follow along these lines `cat` needs special treatment (the user can declare his own `concatenate` connection, but I foresee it's going to be used often so we can simply define special treatment)

Forgot to remove some rebasing text

Forgot to remove some more rebasing text

Removed local copy and default cat method from the function calls

Adjusted some more types for inference, could improve on this as well

Re-placed some left-over spaces

											
										
										
											2018-10-20 19:36:16 +00:00
+								end