Flux.jl/src/layers/basic.jl

172 lines
4.2 KiB
Julia
Raw Normal View History

2017-09-08 21:52:41 +00:00
"""
Chain(layers...)
2017-08-19 19:52:29 +00:00
2017-09-08 21:52:41 +00:00
Chain multiple layers / functions together, so that they are called in sequence
on a given input.
2017-10-18 14:44:06 +00:00
```julia
m = Chain(x -> x^2, x -> x+1)
m(5) == 26
2017-09-08 21:52:41 +00:00
2017-10-18 14:44:06 +00:00
m = Chain(Dense(10, 5), Dense(5, 2))
x = rand(10)
m(x) == m[2](m[1](x))
```
2017-09-08 21:52:41 +00:00
`Chain` also supports indexing and slicing, e.g. `m[2]` or `m[1:end-1]`.
2017-09-10 00:02:48 +00:00
`m[1:3](x)` will calculate the output of the first three layers.
2017-09-08 21:52:41 +00:00
"""
2018-11-16 12:22:15 +00:00
struct Chain{T<:Tuple}
layers::T
Chain(xs...) = new{typeof(xs)}(xs)
2016-08-25 21:49:21 +00:00
end
2019-01-16 14:51:37 +00:00
@forward Chain.layers Base.getindex, Base.length, Base.first, Base.last,
Base.iterate, Base.lastindex
2016-08-25 21:49:21 +00:00
2017-09-27 20:11:21 +00:00
children(c::Chain) = c.layers
mapchildren(f, c::Chain) = Chain(f.(c.layers)...)
2017-08-22 16:13:03 +00:00
2018-11-16 12:22:15 +00:00
applychain(::Tuple{}, x) = x
applychain(fs::Tuple, x) = applychain(tail(fs), first(fs)(x))
(c::Chain)(x) = applychain(c.layers, x)
2017-06-12 11:39:34 +00:00
2017-02-28 16:42:48 +00:00
Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...)
2017-08-19 19:52:29 +00:00
2017-08-21 16:20:09 +00:00
function Base.show(io::IO, c::Chain)
print(io, "Chain(")
join(io, c.layers, ", ")
print(io, ")")
end
2018-08-23 09:56:31 +00:00
activations(c::Chain, x) = accumulate((x, m) -> m(x), c.layers, init = x)
2018-06-26 13:30:46 +00:00
2017-09-08 21:52:41 +00:00
"""
Dense(in::Integer, out::Integer, σ = identity)
Creates a traditional `Dense` layer with parameters `W` and `b`.
2017-08-19 19:52:29 +00:00
2017-09-08 21:52:41 +00:00
y = σ.(W * x .+ b)
2017-09-09 23:58:32 +00:00
The input `x` must be a vector of length `in`, or a batch of vectors represented
2017-10-18 11:48:58 +00:00
as an `in × N` matrix. The out `y` will be a vector or batch of length `out`.
2017-10-18 11:47:45 +00:00
2017-10-18 14:44:06 +00:00
```julia
julia> d = Dense(5, 2)
Dense(5, 2)
2017-10-18 11:47:45 +00:00
2017-10-18 14:44:06 +00:00
julia> d(rand(5))
Tracked 2-element Array{Float64,1}:
0.00257447
-0.00449443
```
2017-09-08 21:52:41 +00:00
"""
2017-09-02 20:50:11 +00:00
struct Dense{F,S,T}
2017-08-19 19:52:29 +00:00
W::S
b::T
2018-02-15 20:52:29 +00:00
σ::F
2017-08-19 19:52:29 +00:00
end
2018-02-15 20:52:29 +00:00
Dense(W, b) = Dense(W, b, identity)
function Dense(in::Integer, out::Integer, σ = identity;
initW = glorot_uniform, initb = zeros)
2018-02-15 20:52:29 +00:00
return Dense(param(initW(out, in)), param(initb(out)), σ)
end
2017-08-19 19:52:29 +00:00
2018-07-12 21:43:11 +00:00
@treelike Dense
2017-08-22 16:13:03 +00:00
2018-08-23 13:34:11 +00:00
function (a::Dense)(x::AbstractArray)
2017-09-27 20:58:34 +00:00
W, b, σ = a.W, a.b, a.σ
2018-08-20 12:08:04 +00:00
σ.(W*x .+ b)
2017-09-27 20:58:34 +00:00
end
2017-08-21 16:20:09 +00:00
2017-09-02 20:50:11 +00:00
function Base.show(io::IO, l::Dense)
print(io, "Dense(", size(l.W, 2), ", ", size(l.W, 1))
2017-08-21 16:20:09 +00:00
l.σ == identity || print(io, ", ", l.σ)
print(io, ")")
end
2017-10-10 20:33:37 +00:00
# Try to avoid hitting generic matmul in some simple cases
# Base's matmul is so slow that it's worth the extra conversion to hit BLAS
(a::Dense{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
invoke(a, Tuple{AbstractArray}, x)
(a::Dense{<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
a(T.(x))
2017-10-10 20:33:37 +00:00
"""
2017-10-23 11:53:07 +00:00
Diagonal(in::Integer)
2017-10-10 20:33:37 +00:00
Creates an element-wise linear transformation layer with learnable
2017-11-21 16:04:04 +00:00
vectors `α` and `β`:
2017-10-10 20:33:37 +00:00
2017-11-21 16:04:04 +00:00
y = α .* x .+ β
2017-10-10 20:33:37 +00:00
2017-10-23 11:53:07 +00:00
The input `x` must be a array where `size(x, 1) == in`.
2017-10-10 20:33:37 +00:00
"""
2017-10-23 11:53:07 +00:00
struct Diagonal{T}
2017-10-10 20:33:37 +00:00
α::T
β::T
end
2018-07-17 15:13:55 +00:00
Diagonal(in::Integer; initα = ones, initβ = zeros) =
2017-10-23 11:53:07 +00:00
Diagonal(param(initα(in)), param(initβ(in)))
2017-10-10 20:33:37 +00:00
2018-07-12 21:43:11 +00:00
@treelike Diagonal
2017-10-10 20:33:37 +00:00
2017-10-23 11:53:07 +00:00
function (a::Diagonal)(x)
2017-10-10 20:33:37 +00:00
α, β = a.α, a.β
α.*x .+ β
end
2017-10-23 11:53:07 +00:00
function Base.show(io::IO, l::Diagonal)
print(io, "Diagonal(", length(l.α), ")")
2017-10-10 20:33:37 +00:00
end
2018-09-07 00:25:32 +00:00
2019-02-27 12:04:59 +00:00
"""
MaxOut(over)
MaxOut is a neural network layer, which has a number of internal layers,
which all have the same input, and the max out returns the elementwise maximium
of the internal layers' outputs.
Maxout over linear dense layers satisfies the univeral approximation theorem.
Reference:
Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron Courville, and Yoshua Bengio.
2013. Maxout networks.
In Proceedings of the 30th International Conference on International Conference on Machine Learning - Volume 28 (ICML'13),
Sanjoy Dasgupta and David McAllester (Eds.), Vol. 28. JMLR.org III-1319-III-1327.
https://arxiv.org/pdf/1302.4389.pdf
"""
struct MaxOut{FS<:Tuple}
over::FS
end
"""
MaxOut(f, n_alts, args...; kwargs...)
Constructs a MaxOut layer over `n_alts` instances of the layer given by `f`.
All other arguements (`args` & `kwargs`) are passed to the constructor `f`.
For example the followeExample usage
will construct a MaxOut layer over 4 dense linear layers,
each identical in structure (784 inputs, 128 outputs).
```julia
insize = 784
outsie = 128
MaxOut(Dense, 4, insize, outsize)
```
"""
function MaxOut(f, n_alts, args...; kwargs...)
over = Tuple(f(args...; kwargs...) for _ in 1:n_alts)
return MaxOut(over)
end
function (mo::MaxOut)(input::AbstractArray)
mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over)
end