Merge remote-tracking branch 'origin/master' into HEAD

This commit is contained in:
janEbert 2019-08-24 12:23:10 +02:00
commit dec1b37e8e
6 changed files with 21 additions and 9 deletions

View File

@ -14,8 +14,8 @@ Which means allocations occur much faster.
And you use less memory. And you use less memory.
## Make sure your custom activation functions preserve the type of their inputs ## Make sure your activation and loss functions preserve the type of their inputs
Not only should your activation functions be [type-stable](https://docs.julialang.org/en/v1/manual/performance-tips/#Write-%22type-stable%22-functions-1), Not only should your activation and loss functions be [type-stable](https://docs.julialang.org/en/v1/manual/performance-tips/#Write-%22type-stable%22-functions-1),
they should also preserve the type of their inputs. they should also preserve the type of their inputs.
A very artificial example using an activation function like A very artificial example using an activation function like
@ -26,6 +26,7 @@ A very artificial example using an activation function like
will result in performance on `Float32` input orders of magnitude slower than the normal `tanh` would, will result in performance on `Float32` input orders of magnitude slower than the normal `tanh` would,
because it results in having to use slow mixed type multiplication in the dense layers. because it results in having to use slow mixed type multiplication in the dense layers.
Similar situations can occur in the loss function during backpropagation.
Which means if you change your data say from `Float64` to `Float32` (which should give a speedup: see above), Which means if you change your data say from `Float64` to `Float32` (which should give a speedup: see above),
you will see a large slow-down you will see a large slow-down
@ -41,7 +42,7 @@ While one could change your activation function (e.g. to use `0.01f0x`) to avoid
the idiomatic (and safe way) is to use `oftype`. the idiomatic (and safe way) is to use `oftype`.
``` ```
leaky_tanh(x) = oftype(x/1, 0.01) + tanh(x) leaky_tanh(x) = oftype(x/1, 0.01)x + tanh(x)
``` ```
@ -60,7 +61,7 @@ end
It is much faster to concatenate them into a matrix, It is much faster to concatenate them into a matrix,
as this will hit BLAS matrix-matrix multiplication, which is much faster than the equivalent sequence of matrix-vector multiplications. as this will hit BLAS matrix-matrix multiplication, which is much faster than the equivalent sequence of matrix-vector multiplications.
Even though this means allocating new memory to store them contiguously. The improvement is enough that it is worthwhile allocating new memory to store them contiguously.
```julia ```julia
x_batch = reduce(hcat, xs) x_batch = reduce(hcat, xs)

View File

@ -64,7 +64,7 @@ function RNNDesc{T}(mode::Int, input::Int, hidden::Int; layers = 1) where T
@check ccall((:cudnnSetRNNDescriptor_v6,libcudnn), cudnnStatus_t, (Ptr{Nothing},Ptr{Nothing},Cint,Cint,Ptr{Nothing},Cint,Cint,Cint,Cint,Cint), @check ccall((:cudnnSetRNNDescriptor_v6,libcudnn), cudnnStatus_t, (Ptr{Nothing},Ptr{Nothing},Cint,Cint,Ptr{Nothing},Cint,Cint,Cint,Cint,Cint),
handle(),d[],hidden,layers,dropoutDesc,inputMode,direction,mode,algo,cudnnDataType(T)) handle(),d[],hidden,layers,dropoutDesc,inputMode,direction,mode,algo,cudnnDataType(T))
w = cuzeros(T, rnnParamSize(T, d[], input)) w = CuArrays.zeros(T, rnnParamSize(T, d[], input))
# TODO: avoid reserve allocation here # TODO: avoid reserve allocation here
rd = RNNDesc{T}(mode, input, hidden, w, params(w, input, hidden, ngates(mode))..., d[]) rd = RNNDesc{T}(mode, input, hidden, w, params(w, input, hidden, ngates(mode))..., d[])
finalizer(rd) do x finalizer(rd) do x
@ -131,8 +131,8 @@ end
# TODO: can we just manipulate strides here? # TODO: can we just manipulate strides here?
# TODO: should use repmat, but this isn't implemented. # TODO: should use repmat, but this isn't implemented.
hBatch(x::AbstractVector, h::CuVector) = h hBatch(x::AbstractVector, h::CuVector) = h
hBatch(x::AbstractMatrix, h::CuVector) = h .* cuones(1, size(x, 2)) hBatch(x::AbstractMatrix, h::CuVector) = h .* CuArrays.ones(1, size(x, 2))
hBatch(x::AbstractMatrix, h::CuMatrix) = h .* cuones(1, size(h,2) == 1 ? size(x,2) : 1) hBatch(x::AbstractMatrix, h::CuMatrix) = h .* CuArrays.ones(1, size(h,2) == 1 ? size(x,2) : 1)
function forward(rnn::RNNDesc{T}, x::CuArray{T}, h_::CuArray{T}, c_ = nothing, train = Val{false}) where T function forward(rnn::RNNDesc{T}, x::CuArray{T}, h_::CuArray{T}, c_ = nothing, train = Val{false}) where T
h = hBatch(x, h_) h = hBatch(x, h_)

View File

@ -110,7 +110,7 @@ end
(a::Dense{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = (a::Dense{<:Any,W})(x::AbstractArray{T}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
invoke(a, Tuple{AbstractArray}, x) invoke(a, Tuple{AbstractArray}, x)
(a::Dense{<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = (a::Dense{<:Any,W})(x::AbstractArray{<:AbstractFloat}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
a(T.(x)) a(T.(x))
""" """

View File

@ -23,7 +23,7 @@ function apply!(o::Descent, x, Δ)
end end
""" """
Momentum(params, η = 0.01; ρ = 0.9) Momentum(η = 0.01; ρ = 0.9)
Gradient descent with learning rate `η` and momentum `ρ`. Gradient descent with learning rate `η` and momentum `ρ`.
""" """

View File

@ -31,6 +31,7 @@ end
function prefor(f, x; seen = IdSet()) function prefor(f, x; seen = IdSet())
x seen && return x seen && return
push!(seen, x)
f(x) f(x)
foreach(x -> prefor(f, x, seen = seen), children(x)) foreach(x -> prefor(f, x, seen = seen), children(x))
return return

View File

@ -85,6 +85,16 @@ end
@test size.(params(m)) == [(5, 10), (5,)] @test size.(params(m)) == [(5, 10), (5,)]
m = RNN(10, 5) m = RNN(10, 5)
@test size.(params(m)) == [(5, 10), (5, 5), (5,), (5,)] @test size.(params(m)) == [(5, 10), (5, 5), (5,), (5,)]
# Layer duplicated in same chain, params just once pls.
c = Chain(m, m)
@test size.(params(c)) == [(5, 10), (5, 5), (5,), (5,)]
# Self-referential array. Just want params, no stack overflow pls.
r = Any[nothing,m]
Flux.children(a::Vector{Any}) = Tuple(a)
r[1] = r
@test size.(params(r)) == [(5, 10), (5, 5), (5,), (5,)]
end end
@testset "Basic Stacking" begin @testset "Basic Stacking" begin