From a8ccc79f61e81d38d3235e53650fe9466693cbf9 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Mon, 30 Jul 2018 20:08:44 +0100 Subject: [PATCH 001/121] perf hacks --- src/tracker/Tracker.jl | 11 +++++++++-- src/tracker/array.jl | 12 ++++++------ src/tracker/back.jl | 10 ++++++++++ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/tracker/Tracker.jl b/src/tracker/Tracker.jl index 65b8db11..21f3a43b 100644 --- a/src/tracker/Tracker.jl +++ b/src/tracker/Tracker.jl @@ -20,7 +20,7 @@ struct Call{F,As<:Tuple} args::As end -Call(f, args) = Call{typeof(f),typeof(args)}(f, args) +Call(f::F, args::T) where {F,T} = Call{F,T}(f, args) Call() = Call(nothing, ()) # When deserialising, the object_id changes @@ -46,7 +46,14 @@ track(f::Call, x) = Tracked{typeof(x)}(f) function _forward end -function track(f, xs...; kw...) +function track(f::F, xs...) where F + y, back = _forward(f, xs...) + ts = map(tracker, xs) + c = Call(back, ts) + track(c, y) +end + +function track_kw(f::F, xs...; kw...) where F y, back = _forward(f, xs...; kw...) track(Call(back, tracker.(xs)), y) end diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 6c7f93e3..90c7f1ec 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -101,7 +101,7 @@ Base.repmat(x::TrackedVecOrMat, a::Int64...) = track(repmat, x, a...) end end -Base.repeat(A::TrackedArray; kw...) = track(repeat, A; kw...) +Base.repeat(A::TrackedArray; kw...) = track_kw(repeat, A; kw...) @grad function repeat(xs; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A))) repeat(data(xs), inner = inner, outer = outer), function (Δ) @@ -324,9 +324,9 @@ logsoftmax(xs::TrackedArray) = track(logsoftmax, xs) @grad logsoftmax(xs) = logsoftmax(data(xs)), Δ -> (nobacksies(:logsoftmax, ∇logsoftmax(data(Δ), data(xs))),) -conv(x::TrackedArray, w::TrackedArray; kw...) = track(conv, x, w; kw...) -conv(x::AbstractArray, w::TrackedArray; kw...) = track(conv, x, w; kw...) -conv(x::TrackedArray, w::AbstractArray; kw...) = track(conv, x, w; kw...) +conv(x::TrackedArray, w::TrackedArray; kw...) = track_kw(conv, x, w; kw...) +conv(x::AbstractArray, w::TrackedArray; kw...) = track_kw(conv, x, w; kw...) +conv(x::TrackedArray, w::AbstractArray; kw...) = track_kw(conv, x, w; kw...) @grad conv(x, w; kw...) = conv(data(x), data(w); kw...), @@ -334,14 +334,14 @@ conv(x::TrackedArray, w::AbstractArray; kw...) = track(conv, x, w; kw...) (NNlib.∇conv_data(data.((Δ, x, w))...; kw...), NNlib.∇conv_filter(data.((Δ, x, w))...; kw...))) -maxpool(x::TrackedArray, k; kw...) = track(maxpool, x, k; kw...) +maxpool(x::TrackedArray, k; kw...) = track_kw(maxpool, x, k; kw...) @grad function maxpool(x, k; kw...) y = maxpool(data(x), k; kw...) y, Δ -> (nobacksies(:maxpool, NNlib.∇maxpool(data.((Δ, y, x))..., k; kw...)), nothing) end -meanpool(x::TrackedArray, k; kw...) = track(meanpool, x, k; kw...) +meanpool(x::TrackedArray, k; kw...) = track_kw(meanpool, x, k; kw...) @grad function meanpool(x, k; kw...) y = meanpool(data(x), k; kw...) diff --git a/src/tracker/back.jl b/src/tracker/back.jl index 08cf9d6a..3264b348 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -152,3 +152,13 @@ function gradient(f, args...) end derivative(f, x) = gradient(f, x)[1] + +# Non-nesting versions + +function gradient_(f, xs...) + xs = param.(xs) + l = f(xs...) + losscheck(l) + back!(l) + grad.(xs) +end From 5b37319289dbd0b439d6b53fbbaebba77448b87b Mon Sep 17 00:00:00 2001 From: Yueh-Hua Tu Date: Wed, 1 Aug 2018 00:10:53 +0800 Subject: [PATCH 002/121] Add Maxpool and Meanpool --- docs/src/models/layers.md | 2 ++ src/layers/conv.jl | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index c2056bb4..070f6737 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -6,6 +6,8 @@ These core layers form the foundation of almost all neural networks. Chain Dense Conv +Maxpool +Meanpool ``` ## Recurrent Layers diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 38310aad..f074e77f 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -50,3 +50,45 @@ function Base.show(io::IO, l::Conv) l.σ == identity || print(io, ", ", l.σ) print(io, ")") end + + +""" + Maxpool(k) + +Maxpooling layer. `k` stands for the size of the window for each dimension of the input. + +Takes the keyword arguments `pad` and `stride`. +""" +struct Maxpool{N} + k::NTuple{N,Int} + pad::NTuple{N,Int} + stride::NTuple{N,Int} + Maxpool(k::NTuple{N,Int}; pad = map(_->0,k), stride = k) where N = new{N}(k, pad, stride) +end + +(m::Maxpool)(x) = maxpool(x, m.k; pad = m.pad, stride = m.stride) + +function Base.show(io::IO, m::Maxpool) + print(io, "Maxpool(", m.k, ", ", m.pad, ", ", m.stride, ")") +end + + +""" + Meanpool(k) + +Meanpooling layer. `k` stands for the size of the window for each dimension of the input. + +Takes the keyword arguments `pad` and `stride`. +""" +struct Meanpool{N} + k::NTuple{N,Int} + pad::NTuple{N,Int} + stride::NTuple{N,Int} + Meanpool(k::NTuple{N,Int}; pad = map(_->0,k), stride = k) where N = new{N}(k, pad, stride) +end + +(m::Meanpool)(x) = meanpool(x, m.k; pad = m.pad, stride = m.stride) + +function Base.show(io::IO, m::Meanpool) + print(io, "Meanpool(", m.k, ", ", m.pad, ", ", m.stride, ")") +end From 1fd49c2a90f369c62a4f90327b2dcf5e2ad27ddf Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 20 Jun 2018 15:16:45 +0100 Subject: [PATCH 003/121] fix array show --- src/tracker/array.jl | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 90c7f1ec..072ee490 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -28,17 +28,13 @@ Base.eltype(x::Type{<:TrackedArray{T}}) where T <: Real = TrackedReal{T} Base.show(io::IO, ::Type{TrackedArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} = print(io, "TrackedArray{…,$A}") -function Base.showarray(io::IO, X::TrackedArray, repr::Bool = true; header = true) - if repr - print(io, "param(") - Base.showarray(io, data(X), true) - print(io, ")") - else - header && print(io, "Tracked ") - Base.showarray(io, data(X), false, header = header) - end +function Base.summary(io::IO, x::TrackedArray) + print(io, "Tracked ") + summary(io, data(x)) end +Base.print_array(io::IO, x::TrackedArray) = Base.print_array(io, data(x)) + Base.setindex!(xs::TrackedArray, v, i...) = error("Can't differentiate `setindex!`") From a49e2eae418abc80b9c3fe763593bbab3922067a Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Tue, 12 Jun 2018 18:09:18 +0100 Subject: [PATCH 004/121] deprecated Void --- docs/src/internals/tracker.md | 2 +- src/cuda/cudnn.jl | 70 +++++++++++++++++------------------ src/tracker/Tracker.jl | 4 +- src/tracker/back.jl | 8 ++-- 4 files changed, 42 insertions(+), 42 deletions(-) diff --git a/docs/src/internals/tracker.md b/docs/src/internals/tracker.md index 2c134f12..3d39451d 100644 --- a/docs/src/internals/tracker.md +++ b/docs/src/internals/tracker.md @@ -134,7 +134,7 @@ All `Tracked*` objects (`TrackedArray`, `TrackedReal`) are light wrappers around ```julia julia> x.tracker -Flux.Tracker.Tracked{Array{Float64,1}}(0x00000000, Flux.Tracker.Call{Void,Tuple{}}(nothing, ()), true, [5.0, 6.0], [-2.0, -2.0]) +Flux.Tracker.Tracked{Array{Float64,1}}(0x00000000, Flux.Tracker.Call{Nothing,Tuple{}}(nothing, ()), true, [5.0, 6.0], [-2.0, -2.0]) ``` The `Tracker` stores the gradient of a given object, which we've seen before. diff --git a/src/cuda/cudnn.jl b/src/cuda/cudnn.jl index 85b5b975..fe1b76d2 100644 --- a/src/cuda/cudnn.jl +++ b/src/cuda/cudnn.jl @@ -2,23 +2,23 @@ using CuArrays.CUDNN: @check, libcudnn, cudnnStatus_t, libcudnn_handle, cudnnDataType, TensorDesc, FilterDesc mutable struct DropoutDesc - ptr::Ptr{Void} + ptr::Ptr{Nothing} states::CuVector{UInt8} end -Base.unsafe_convert(::Type{Ptr{Void}}, dd::DropoutDesc) = dd.ptr +Base.unsafe_convert(::Type{Ptr{Nothing}}, dd::DropoutDesc) = dd.ptr function DropoutDesc(ρ::Real; seed::Integer=0) d = [C_NULL] s = Csize_t[0] - @check ccall((:cudnnCreateDropoutDescriptor,libcudnn), cudnnStatus_t, (Ptr{Ptr{Void}},), d) - @check ccall((:cudnnDropoutGetStatesSize,libcudnn),cudnnStatus_t,(Ptr{Void},Ptr{Csize_t}),libcudnn_handle[],s) + @check ccall((:cudnnCreateDropoutDescriptor,libcudnn), cudnnStatus_t, (Ptr{Ptr{Nothing}},), d) + @check ccall((:cudnnDropoutGetStatesSize,libcudnn),cudnnStatus_t,(Ptr{Nothing},Ptr{Csize_t}),libcudnn_handle[],s) states = CuArray{UInt8}(s[]) # TODO: can we drop this when ρ=0? desc = DropoutDesc(d[], states) - @check ccall((:cudnnSetDropoutDescriptor,libcudnn),cudnnStatus_t,(Ptr{Void},Ptr{Void},Cfloat,Ptr{Void},Csize_t,Culonglong), + @check ccall((:cudnnSetDropoutDescriptor,libcudnn),cudnnStatus_t,(Ptr{Nothing},Ptr{Nothing},Cfloat,Ptr{Nothing},Csize_t,Culonglong), desc,libcudnn_handle[],ρ,states,length(states),seed) finalizer(desc, x -> - @check ccall((:cudnnDestroyDropoutDescriptor,libcudnn),cudnnStatus_t,(Ptr{Void},),x)) + @check ccall((:cudnnDestroyDropoutDescriptor,libcudnn),cudnnStatus_t,(Ptr{Nothing},),x)) return desc end @@ -57,14 +57,14 @@ mutable struct RNNDesc{T} params::CuVector{T} weights::NTuple{2,CuMatrix{T}} bias::CuVector{T} - ptr::Ptr{Void} + ptr::Ptr{Nothing} end -Base.unsafe_convert(::Type{Ptr{Void}}, d::RNNDesc) = d.ptr +Base.unsafe_convert(::Type{Ptr{Nothing}}, d::RNNDesc) = d.ptr function rnnParamSize(T, r, input) size = Csize_t[0] - @check ccall((:cudnnGetRNNParamsSize, libcudnn), cudnnStatus_t, (Ptr{Void},Ptr{Void},Ptr{Void},Ptr{Csize_t},Cint), + @check ccall((:cudnnGetRNNParamsSize, libcudnn), cudnnStatus_t, (Ptr{Nothing},Ptr{Nothing},Ptr{Nothing},Ptr{Csize_t},Cint), libcudnn_handle[], r, TensorDesc(T, (1,input,1)), size, cudnnDataType(T)) return Int(size[])÷sizeof(T) end @@ -74,26 +74,26 @@ ngates(r::RNNDesc) = ngates(r.mode) function RNNDesc{T}(mode::Int, input::Int, hidden::Int; layers = 1) where T d = [C_NULL] - @check ccall((:cudnnCreateRNNDescriptor,libcudnn),cudnnStatus_t,(Ptr{Ptr{Void}},),d) + @check ccall((:cudnnCreateRNNDescriptor,libcudnn),cudnnStatus_t,(Ptr{Ptr{Nothing}},),d) dropoutDesc = DropoutDesc(0) inputMode = LINEAR_INPUT direction = UNIDIRECTIONAL algo = RNN_ALGO_STANDARD - @check ccall((:cudnnSetRNNDescriptor_v6,libcudnn), cudnnStatus_t, (Ptr{Void},Ptr{Void},Cint,Cint,Ptr{Void},Cint,Cint,Cint,Cint,Cint), + @check ccall((:cudnnSetRNNDescriptor_v6,libcudnn), cudnnStatus_t, (Ptr{Nothing},Ptr{Nothing},Cint,Cint,Ptr{Nothing},Cint,Cint,Cint,Cint,Cint), libcudnn_handle[],d[],hidden,layers,dropoutDesc,inputMode,direction,mode,algo,cudnnDataType(T)) w = cuzeros(T, rnnParamSize(T, d[], input)) # TODO: avoid reserve allocation here rd = RNNDesc{T}(mode, input, hidden, w, params(w, input, hidden, ngates(mode))..., d[]) finalizer(rd, x -> - @check ccall((:cudnnDestroyRNNDescriptor,libcudnn),cudnnStatus_t,(Ptr{Void},),x)) + @check ccall((:cudnnDestroyRNNDescriptor,libcudnn),cudnnStatus_t,(Ptr{Nothing},),x)) return rd end function rnnWorkspaceSize(r::RNNDesc, seqlen, xdesc) size = Csize_t[0] - @check ccall((:cudnnGetRNNWorkspaceSize, libcudnn), cudnnStatus_t, (Ptr{Void},Ptr{Void},Cint,Ptr{Ptr{Void}},Ptr{Csize_t}), + @check ccall((:cudnnGetRNNWorkspaceSize, libcudnn), cudnnStatus_t, (Ptr{Nothing},Ptr{Nothing},Cint,Ptr{Ptr{Nothing}},Ptr{Csize_t}), libcudnn_handle[], r, seqlen, xdesc, size) return Int(size[]) end @@ -110,7 +110,7 @@ getworkspace(r::RNNDesc, seqlen, xdesc) = function rnnTrainingReserveSize(r::RNNDesc, seqlen, xdesc) size = Csize_t[0] - @check ccall((:cudnnGetRNNTrainingReserveSize,libcudnn), cudnnStatus_t, (Ptr{Void}, Ptr{Void}, Cint, Ptr{Ptr{Void}}, Ptr{Csize_t}), + @check ccall((:cudnnGetRNNTrainingReserveSize,libcudnn), cudnnStatus_t, (Ptr{Nothing}, Ptr{Nothing}, Cint, Ptr{Ptr{Nothing}}, Ptr{Csize_t}), libcudnn_handle[], r, seqlen, xdesc, size) return Int(size[]) end @@ -119,19 +119,19 @@ function cudnnRNNForward(rnn::RNNDesc{T}, seqlen, xd, x, hd, h, cd, c, wd, w, yd workspace, reserve=nothing) where T if reserve == nothing @check ccall((:cudnnRNNForwardInference, libcudnn), cudnnStatus_t, - (Ptr{Void}, Ptr{Void}, Cint, - Ptr{Ptr{Void}}, Ptr{T}, Ptr{Void}, Ptr{T}, Ptr{Void}, Ptr{T}, - Ptr{Void}, Ptr{T}, Ptr{Ptr{Void}}, Ptr{T}, Ptr{Void}, Ptr{T}, - Ptr{Void}, Ptr{T}, - Ptr{Void}, Csize_t), + (Ptr{Nothing}, Ptr{Nothing}, Cint, + Ptr{Ptr{Nothing}}, Ptr{T}, Ptr{Nothing}, Ptr{T}, Ptr{Nothing}, Ptr{T}, + Ptr{Nothing}, Ptr{T}, Ptr{Ptr{Nothing}}, Ptr{T}, Ptr{Nothing}, Ptr{T}, + Ptr{Nothing}, Ptr{T}, + Ptr{Nothing}, Csize_t), libcudnn_handle[], rnn, seqlen, xd, x, hd, h, cd, c, wd, w, yd, y, hod, ho, cod, co, workspace, length(workspace)) else @check ccall((:cudnnRNNForwardTraining, libcudnn), cudnnStatus_t, - (Ptr{Void}, Ptr{Void}, Cint, - Ptr{Ptr{Void}}, Ptr{T}, Ptr{Void}, Ptr{T}, Ptr{Void}, Ptr{T}, Ptr{Void}, Ptr{T}, Ptr{Ptr{Void}}, Ptr{T}, Ptr{Void}, Ptr{T}, Ptr{Void}, Ptr{T}, - Ptr{Void}, Csize_t, Ptr{Void}, Csize_t), + (Ptr{Nothing}, Ptr{Nothing}, Cint, + Ptr{Ptr{Nothing}}, Ptr{T}, Ptr{Nothing}, Ptr{T}, Ptr{Nothing}, Ptr{T}, Ptr{Nothing}, Ptr{T}, Ptr{Ptr{Nothing}}, Ptr{T}, Ptr{Nothing}, Ptr{T}, Ptr{Nothing}, Ptr{T}, + Ptr{Nothing}, Csize_t, Ptr{Nothing}, Csize_t), libcudnn_handle[], rnn, seqlen, xd, x, hd, h, cd, c, wd, w, yd, y, hod, ho, cod, co, workspace, length(workspace), reserve, length(reserve)) @@ -140,7 +140,7 @@ end xDesc(x) = [TensorDesc(eltype(x), (1, size(x, 1), size(x, 2)))] -hDesc(h::Void) = C_NULL, C_NULL +hDesc(h::Nothing) = C_NULL, C_NULL hDesc(x::Integer) = (@assert x == 0; hDesc(nothing)) function hDesc(h::CuArray) TensorDesc(eltype(h), (size(h, 1), size(h, 2), 1)), h @@ -187,11 +187,11 @@ forwardTrain(rnn::RNNDesc{T}, x::CuArray{T}, h::CuArray{T}, c = nothing) where T function cudnnRNNBackwardData(rnn::RNNDesc{T}, seqlen, yd, y, dyd, dy, dhod, dho, dcod, dco, wd, w, hd, h, cd, c, dxd, dx, dhd, dh, dcd, dc, ws, rs) where T @check ccall((:cudnnRNNBackwardData,libcudnn),cudnnStatus_t, - (Ptr{Void}, Ptr{Void}, Cint, - Ptr{Ptr{Void}}, Ptr{T}, Ptr{Ptr{Void}}, Ptr{T}, Ptr{Void}, Ptr{T}, - Ptr{Void}, Ptr{T}, Ptr{Void}, Ptr{T}, Ptr{Void}, Ptr{T}, Ptr{Void}, - Ptr{T}, Ptr{Ptr{Void}}, Ptr{T}, Ptr{Void}, Ptr{T}, Ptr{Void}, Ptr{T}, - Ptr{Void}, Csize_t, Ptr{Void}, Csize_t), + (Ptr{Nothing}, Ptr{Nothing}, Cint, + Ptr{Ptr{Nothing}}, Ptr{T}, Ptr{Ptr{Nothing}}, Ptr{T}, Ptr{Nothing}, Ptr{T}, + Ptr{Nothing}, Ptr{T}, Ptr{Nothing}, Ptr{T}, Ptr{Nothing}, Ptr{T}, Ptr{Nothing}, + Ptr{T}, Ptr{Ptr{Nothing}}, Ptr{T}, Ptr{Nothing}, Ptr{T}, Ptr{Nothing}, Ptr{T}, + Ptr{Nothing}, Csize_t, Ptr{Nothing}, Csize_t), libcudnn_handle[], rnn, seqlen, yd, y, dyd, dy, dhod, dho, dcod, dco, wd, w, hd, h, cd, c, dxd, dx, dhd, dh, dcd, dc, ws, length(ws), rs, length(rs)) end @@ -217,13 +217,13 @@ backwardData(rnn, y, dy, dho, hx, reserve) = function cudnnRNNBackwardWeights(rnn::RNNDesc{T}, seqlen, xd, x, hd, h, yd, y, dwd, dw, workspace, reserve) where T @check ccall((:cudnnRNNBackwardWeights,libcudnn), cudnnStatus_t, - (Ptr{Void}, Ptr{Void}, Cint, # handle, rnnDesc, seqLength - Ptr{Ptr{Void}}, Ptr{T}, #x - Ptr{Void}, Ptr{T}, #hx - Ptr{Ptr{Void}}, Ptr{T}, #y - Ptr{Void}, Csize_t, #ws - Ptr{Void}, Ptr{T}, #dw - Ptr{Void}, Csize_t), #rs + (Ptr{Nothing}, Ptr{Nothing}, Cint, # handle, rnnDesc, seqLength + Ptr{Ptr{Nothing}}, Ptr{T}, #x + Ptr{Nothing}, Ptr{T}, #hx + Ptr{Ptr{Nothing}}, Ptr{T}, #y + Ptr{Nothing}, Csize_t, #ws + Ptr{Nothing}, Ptr{T}, #dw + Ptr{Nothing}, Csize_t), #rs libcudnn_handle[], rnn, seqlen, xd, x, hd, h, yd, y, workspace, length(workspace), dwd, dw, reserve, length(reserve)) end diff --git a/src/tracker/Tracker.jl b/src/tracker/Tracker.jl index 21f3a43b..63146f5f 100644 --- a/src/tracker/Tracker.jl +++ b/src/tracker/Tracker.jl @@ -12,7 +12,7 @@ tracker(x) = nothing istracked(x) = tracker(x) ≠ nothing isleaf(x) = !istracked(x) || isleaf(tracker(x)) grad(x) = grad(tracker(x)) -grad(::Void) = nothing +grad(::Nothing) = nothing data(x) = x struct Call{F,As<:Tuple} @@ -35,7 +35,7 @@ mutable struct Tracked{T} grad::T Tracked{T}(f::Call) where T = new(0, f, false) Tracked{T}(f::Call, grad::T) where T = new(0, f, false, grad) - Tracked{T}(f::Call{Void}, grad::T) where T = new(0, f, true, grad) + Tracked{T}(f::Call{Nothing}, grad::T) where T = new(0, f, true, grad) end istracked(x::Tracked) = true diff --git a/src/tracker/back.jl b/src/tracker/back.jl index 3264b348..06f62e5d 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -26,7 +26,7 @@ function back_(c::Call, Δ) foreach(back, c.args, data.(Δs)) end -back_(::Call{Void}, Δ) = nothing +back_(::Call{Nothing}, Δ) = nothing accum!(x, Δ) = x .+ Δ accum!(x::AbstractArray, Δ) = (x .+= Δ) @@ -47,7 +47,7 @@ function back(x::Tracked, Δ) return end -back(::Void, _) = return +back(::Nothing, _) = return # Interface methods @@ -105,7 +105,7 @@ function back_(g::Grads, c::Call, Δ) foreach((x, Δ) -> back(g, x, Δ), c.args, Δs) end -back_(g::Grads, ::Call{Void}, Δ) = nothing +back_(g::Grads, ::Call{Nothing}, Δ) = nothing function back(g::Grads, x::Tracked, Δ) x.isleaf && (accum!(g, x, Δ); return) @@ -119,7 +119,7 @@ function back(g::Grads, x::Tracked, Δ) return end -back(::Grads, ::Void, _) = return +back(::Grads, ::Nothing, _) = return function forward(f, ps::Params) y = f() From b18b51656cb0341a47eb79e382c96ec05b6ef03e Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 3 Aug 2018 12:54:24 +0100 Subject: [PATCH 005/121] requires update --- src/Flux.jl | 2 +- src/onehot.jl | 2 +- src/treelike.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index 0d78024b..dac36973 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -37,6 +37,6 @@ include("layers/normalise.jl") include("data/Data.jl") -@require CuArrays include("cuda/cuda.jl") +@init @require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" include("cuda/cuda.jl") end # module diff --git a/src/onehot.jl b/src/onehot.jl index 07206dfe..f19fa6f8 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -32,7 +32,7 @@ import Adapt.adapt adapt(T, xs::OneHotMatrix) = OneHotMatrix(xs.height, adapt(T, xs.data)) -@require CuArrays begin +@init @require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" begin import CuArrays: CuArray, cudaconvert Base.Broadcast._containertype(::Type{<:OneHotMatrix{<:CuArray}}) = CuArray cudaconvert(x::OneHotMatrix{<:CuArray}) = OneHotMatrix(x.height, cudaconvert(x.data)) diff --git a/src/treelike.jl b/src/treelike.jl index 13e562e6..e65ac41a 100644 --- a/src/treelike.jl +++ b/src/treelike.jl @@ -53,7 +53,7 @@ cpu(m) = mapleaves(x -> adapt(Array, x), m) gpu_adaptor = identity -@require CuArrays begin +@init @require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" begin global gpu_adaptor = CuArrays.cu end From 88a265154c85a9b03ee82a915bf22320d84585d3 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 20 Jun 2018 15:18:07 +0100 Subject: [PATCH 006/121] deprecations --- REQUIRE | 2 +- src/layers/basic.jl | 2 +- src/layers/normalise.jl | 4 ++-- src/layers/recurrent.jl | 2 +- src/tracker/array.jl | 34 ++++++++++------------------------ src/utils.jl | 2 +- test/tracker.jl | 4 ---- 7 files changed, 16 insertions(+), 34 deletions(-) diff --git a/REQUIRE b/REQUIRE index 95fda02c..8b235c42 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ -julia 0.6.0 +julia 0.7- Juno MacroTools 0.3.3 NNlib diff --git a/src/layers/basic.jl b/src/layers/basic.jl index cf89df41..2da9be11 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -16,7 +16,7 @@ m(x) == m[2](m[1](x)) `Chain` also supports indexing and slicing, e.g. `m[2]` or `m[1:end-1]`. `m[1:3](x)` will calculate the output of the first three layers. """ -type Chain +struct Chain layers::Vector{Any} Chain(xs...) = new([xs...]) end diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 54f5eb56..2b212d9a 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -135,8 +135,8 @@ function (BN::BatchNorm)(x) # update moving mean/std mtm = data(convert(T, BN.momentum)) - BN.μ = (1 - mtm) .* BN.μ .+ mtm .* squeeze(data(μ), (axes...)) - BN.σ = (1 - mtm) .* BN.σ .+ mtm .* squeeze(data(σ), (axes...)) .* m ./ (m - 1) + BN.μ = (1 - mtm) .* BN.μ .+ mtm .* squeeze(data(μ), (axes...,)) + BN.σ = (1 - mtm) .* BN.σ .+ mtm .* squeeze(data(σ), (axes...,)) .* m ./ (m - 1) end let λ = BN.λ diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 4707fc92..3fc40560 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -178,7 +178,7 @@ function (m::GRUCell)(h, x) r = σ.(gate(gx, o, 1) .+ gate(gh, o, 1) .+ gate(b, o, 1)) z = σ.(gate(gx, o, 2) .+ gate(gh, o, 2) .+ gate(b, o, 2)) h̃ = tanh.(gate(gx, o, 3) .+ r .* gate(gh, o, 3) .+ gate(b, o, 3)) - h′ = (1.-z).*h̃ .+ z.*h + h′ = (1 .- z).*h̃ .+ z.*h return h′, h′ end diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 072ee490..57a66821 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -1,3 +1,5 @@ +using LinearAlgebra + struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} tracker::Tracked{A} data::A @@ -77,26 +79,9 @@ Base.:-(xs::TrackedArray) = track(-, xs) Base.transpose(xs::TrackedArray) = track(transpose, xs) Base.ctranspose(xs::TrackedArray) = track(ctranspose, xs) -@grad transpose(xs) = data(xs).', Δ -> (reshape(Δ.', size(xs)),) +@grad transpose(xs) = transpose(data(xs)), Δ -> (reshape(transpose(Δ), size(xs)),) @grad ctranspose(xs) = data(xs)', Δ -> (reshape(Δ', size(xs)),) -Base.repmat(x::TrackedVecOrMat, a::Integer...) = track(repmat, x, a...) -Base.repmat(x::TrackedVecOrMat, a::Int64...) = track(repmat, x, a...) - -@grad function repmat(xs, m, n = 1) - repmat(data(xs), m, n), function (Δ) - Δ′ = similar(xs) - S = size(xs) - for (i,v) in enumerate(data(Δ)) - d1 = divrem(i-1, S[1]*m) - x = d1[2] % S[1]+1 - y = d1[1] % S[2]+1 - Δ′[x, y] += v - end - return (nobacksies(:repmat, Δ′), nothing, nothing) - end -end - Base.repeat(A::TrackedArray; kw...) = track_kw(repeat, A; kw...) @grad function repeat(xs; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A))) @@ -115,7 +100,6 @@ Base.repeat(A::TrackedArray; kw...) = track_kw(repeat, A; kw...) end end - for f in [:vcat, :hcat] @eval begin # This section is a bit of a hack since julia doesn't have a standardised @@ -241,9 +225,11 @@ Base.maximum(xs::TrackedArray, region) = track(maximum, xs, region) Base.minimum(xs::TrackedArray) = track(minimum, xs) Base.minimum(xs::TrackedArray, region) = track(minimum, xs, region) -LinAlg.dot(xs::TrackedVector, ys::TrackedVector) = track(dot, xs, ys) -LinAlg.dot(xs::AbstractVector, ys::TrackedVector) = track(dot, xs, ys) -LinAlg.dot(xs::TrackedVector, ys::AbstractVector) = track(dot, xs, ys) +import LinearAlgebra: dot + +dot(xs::TrackedVector, ys::TrackedVector) = track(dot, xs, ys) +dot(xs::AbstractVector, ys::TrackedVector) = track(dot, xs, ys) +dot(xs::TrackedVector, ys::AbstractVector) = track(dot, xs, ys) @grad dot(xs, ys) = dot(data(xs), data(ys)), Δ -> (Δ .* ys, Δ .* xs) @@ -253,7 +239,7 @@ Base.std(x::TrackedArray; mean = Base.mean(x)) = Base.std(x::TrackedArray, dim; mean = Base.mean(x, dim)) = sqrt.(sum((x .- mean).^2, dim) ./ (size(x, dim)-1)) -Base.vecnorm(x::TrackedArray, p::Real = 2) = +LinearAlgebra.vecnorm(x::TrackedArray, p::Real = 2) = sum(abs.(x).^p .+ eps(0f0))^(1/p) # avoid d(sqrt(x))/dx == Inf at 0 @grad mean(xs) = mean(data(xs)), Δ -> (Δ / length(xs),) @@ -278,7 +264,7 @@ end # BLAS -Base.diagm(x::TrackedVector) = track(diagm, x) +LinearAlgebra.diagm(x::TrackedVector) = track(diagm, x) @grad diagm(x) = diagm(data(x)), Δ -> (diag(Δ),) for f in :[*, Ac_mul_B, A_mul_Bc, A_mul_Bt, At_mul_B].args diff --git a/src/utils.jl b/src/utils.jl index 5b4f1f17..fbb05934 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -119,7 +119,7 @@ function throttle(f, timeout; leading=true, trailing=false) end cooldown = false - @schedule try + @async try while (sleep(timeout); later != nothing) later() later = nothing diff --git a/test/tracker.jl b/test/tracker.jl index 40229e18..1fa89032 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -111,10 +111,6 @@ end @test gradtest(x -> permutedims(x, [3,1,2]), rand(4,5,6)) -# TODO unreliable -@test gradtest(x -> repmat(x, 5,5), rand(4,5)) -@test gradtest(x -> repmat(x, 5), rand(4,5)) - @test gradtest(x -> repeat(x; inner=2, outer=3), rand(5)) @test gradtest(x -> repeat(x; inner=(2,2,1), outer=(1,1,3)), rand(5,4,3)) From 7057ca739eb641ae5ada65f442bf205f3af8358e Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 20 Jun 2018 15:18:07 +0100 Subject: [PATCH 007/121] fix std usage --- src/Flux.jl | 2 +- src/tracker/array.jl | 6 ++++-- test/tracker.jl | 1 + test/utils.jl | 1 + 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index dac36973..8fc35294 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -4,7 +4,7 @@ module Flux # Zero Flux Given -using Juno, Requires, Reexport +using Juno, Requires, Reexport, StatsBase using MacroTools: @forward export Chain, Dense, RNN, LSTM, GRU, Conv, diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 57a66821..de950b99 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -233,10 +233,12 @@ dot(xs::TrackedVector, ys::AbstractVector) = track(dot, xs, ys) @grad dot(xs, ys) = dot(data(xs), data(ys)), Δ -> (Δ .* ys, Δ .* xs) +using StatsBase + # Hacks to get std working -Base.std(x::TrackedArray; mean = Base.mean(x)) = +StatsBase.std(x::TrackedArray; mean = Base.mean(x)) = sqrt.(sum((x .- mean).^2) ./ (length(x)-1)) -Base.std(x::TrackedArray, dim; mean = Base.mean(x, dim)) = +StatsBase.std(x::TrackedArray, dim; mean = Base.mean(x, dim)) = sqrt.(sum((x .- mean).^2, dim) ./ (size(x, dim)-1)) LinearAlgebra.vecnorm(x::TrackedArray, p::Real = 2) = diff --git a/test/tracker.jl b/test/tracker.jl index 1fa89032..40443d6d 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -1,6 +1,7 @@ using Flux.Tracker, Base.Test, NNlib using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint using NNlib: conv +using StatsBase gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...) gradtest(f, dims...) = gradtest(f, rand.(dims)...) diff --git a/test/utils.jl b/test/utils.jl index 7ff21db1..bf3dd30a 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,4 +1,5 @@ using Flux: throttle, initn, glorot_uniform, glorot_normal, jacobian +using StatsBase: std @testset "Throttle" begin @testset "default behaviour" begin From fb8a2206593dc8b46c33ac6b3723bb1ac395c0f4 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 20 Jun 2018 14:11:31 +0100 Subject: [PATCH 008/121] fix matmul --- src/tracker/array.jl | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index de950b99..d5c04b5c 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -1,3 +1,5 @@ +import Base: *, == + using LinearAlgebra struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} @@ -56,9 +58,9 @@ Base.similar(x::TrackedArray, dims::Union{AbstractUnitRange,Integer}...) = Base.similar(x::TrackedArray, T::Type) = similar(data(x), T) -Base.:(==)(x::TrackedArray, y) = data(x) == y -Base.:(==)(y, x::TrackedArray) = y == data(x) -Base.:(==)(x::TrackedArray, y::TrackedArray) = data(x) == data(y) +x::TrackedArray == y = data(x) == y +y == x::TrackedArray = y == data(x) +x::TrackedArray == y::TrackedArray = data(x) == data(y) # Array Stdlib @@ -77,10 +79,10 @@ Base.:-(xs::TrackedArray) = track(-, xs) @grad -(xs) = -data(xs), Δ -> (-Δ,) Base.transpose(xs::TrackedArray) = track(transpose, xs) -Base.ctranspose(xs::TrackedArray) = track(ctranspose, xs) +Base.adjoint(xs::TrackedArray) = track(adjoint, xs) @grad transpose(xs) = transpose(data(xs)), Δ -> (reshape(transpose(Δ), size(xs)),) -@grad ctranspose(xs) = data(xs)', Δ -> (reshape(Δ', size(xs)),) +@grad adjoint(xs) = data(xs)', Δ -> (reshape(Δ', size(xs)),) Base.repeat(A::TrackedArray; kw...) = track_kw(repeat, A; kw...) @@ -269,31 +271,20 @@ end LinearAlgebra.diagm(x::TrackedVector) = track(diagm, x) @grad diagm(x) = diagm(data(x)), Δ -> (diag(Δ),) -for f in :[*, Ac_mul_B, A_mul_Bc, A_mul_Bt, At_mul_B].args - @eval begin - import Base.$f - $f(a::TrackedMatrix, b::TrackedMatrix) = track($f, a, b) - $f(a::TrackedMatrix, b::AbstractMatrix) = track($f, a, b) - $f(a::AbstractMatrix, b::TrackedMatrix) = track($f, a, b) +x::TrackedMatrix * y::AbstractMatrix = track(*, x, y) +y::AbstractMatrix * x::TrackedMatrix = track(*, x, y) +x::TrackedMatrix * y::TrackedMatrix = track(*, x, y) - $f(a::TrackedMatrix, b::TrackedVector) = track($f, a, b) - $f(a::TrackedMatrix, b::AbstractVector) = track($f, a, b) - $f(a::AbstractMatrix, b::TrackedVector) = track($f, a, b) +x::TrackedMatrix * y::AbstractVector = track(*, x, y) +y::AbstractMatrix * x::TrackedVector = track(*, x, y) +x::TrackedMatrix * y::TrackedVector = track(*, x, y) - $f(a::TrackedVector, b::TrackedVector) = track($f, a, b) - $f(a::TrackedVector, b::AbstractVector) = track($f, a, b) - $f(a::AbstractVector, b::TrackedVector) = track($f, a, b) - end -end +x::TrackedVector * y::AbstractVector = track(*, x, y) +y::AbstractVector * x::TrackedVector = track(*, x, y) +x::TrackedVector * y::TrackedVector = track(*, x, y) @grad a::AbstractMatrix * b::AbstractVecOrMat = - data(a)*data(b), Δ -> (A_mul_Bt(Δ, b), At_mul_B(a, Δ)) - -@grad Ac_mul_B(a, b) = Ac_mul_B(data(a), data(b)), Δ -> (A_mul_Bt(Δ, b)', a*Δ) -@grad A_mul_Bc(a, b) = A_mul_Bc(data(a), data(b)), Δ -> (Δ * b, At_mul_B(a, Δ)') - -@grad At_mul_B(a, b) = At_mul_B(data(a), data(b)), Δ -> (A_mul_Bt(Δ, b)', a*Δ) -@grad A_mul_Bt(a, b) = A_mul_Bt(data(a), data(b)), Δ -> (Δ * b, At_mul_B(a, Δ)') + data(a)*data(b), Δ -> (Δ * transpose(b), transpose(a) * Δ) # NNlib From e486c5061088197b2964e09b7f144912cf8bb693 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 20 Jun 2018 14:44:16 +0100 Subject: [PATCH 009/121] fix data --- src/data/cmudict.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/cmudict.jl b/src/data/cmudict.jl index 2a26b691..b3257734 100644 --- a/src/data/cmudict.jl +++ b/src/data/cmudict.jl @@ -40,7 +40,7 @@ function rawdict() filter(!isempty, split.(split(readstring(deps("cmudict", "cmudict")), "\n")))) end -validword(s) = ismatch(r"^[\w\-\.]+$", s) +validword(s) = isascii(s) && ismatch(r"^[\w\-\.]+$", s) cmudict() = filter((s, ps) -> validword(s), rawdict()) From adc216f18237d6524ff09fa625cdb8a3a09fdb25 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 19:28:30 +0100 Subject: [PATCH 010/121] fix broadcasting --- src/tracker/array.jl | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index d5c04b5c..1a5c6c1a 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -327,6 +327,9 @@ end using ForwardDiff: Dual, partials, value +_size(x::AbstractArray) = size(x) +_size(x) = () + dualify(xs, n) = xs dualify(xs::AbstractArray, ps) = map(x -> Dual(x, ps), xs) dualify(xs::Real, ps) = Dual(xs, ps) @@ -343,7 +346,7 @@ function getpartial(Δ, x, i) end function ∇broadcast(f, args::Vararg{Any,N}) where N - sizes = size.(args) + sizes = _size.(args) dargs = map((x,i) -> dualify(data(x), ntuple(j -> i==j, Val{N})), args, ntuple(identity, Val{N})) out = broadcast(f, dargs...) eltype(out) <: Dual || return out @@ -358,14 +361,14 @@ function ∇broadcast(f, args::Vararg{Any,N}) where N track(Call(back, tracker.(args)), y) end -Base.Broadcast._containertype(::Type{<:TrackedReal}) = TrackedArray -Base.Broadcast._containertype(::Type{<:TrackedArray}) = TrackedArray -Base.Broadcast.promote_containertype(::Type{TrackedArray}, ::Type{TrackedArray}) = TrackedArray -Base.Broadcast.promote_containertype(::Type{Array}, ::Type{TrackedArray}) = TrackedArray -Base.Broadcast.promote_containertype(::Type{TrackedArray}, ::Type{Array}) = TrackedArray -Base.Broadcast.promote_containertype(::Type{TrackedArray}, ct) = TrackedArray -Base.Broadcast.promote_containertype(ct, ::Type{TrackedArray}) = TrackedArray -Base.Broadcast.broadcast_indices(::Type{TrackedArray}, A::Ref) = () -Base.Broadcast.broadcast_indices(::Type{TrackedArray}, A) = indices(A) +using Base.Broadcast: BroadcastStyle -Base.Broadcast.broadcast_c(f, ::Type{TrackedArray}, A, Bs...) = ∇broadcast(f, A, Bs...) +struct TrackedStyle <: BroadcastStyle end + +Broadcast.BroadcastStyle(::Type{<:Union{TrackedArray,TrackedReal}}) = TrackedStyle() +Broadcast.BroadcastStyle(::TrackedStyle, ::BroadcastStyle) = TrackedStyle() + +function Base.copy(bc::Broadcast.Broadcasted{TrackedStyle}) + bc = Broadcast.flatten(bc) + ∇broadcast(bc.f, bc.args...) +end From 00cfe24d66a0615ab4030c2646b8fa49017ef573 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 20:42:32 +0100 Subject: [PATCH 011/121] fix cat --- src/tracker/array.jl | 21 ++++++++++++--------- test/tracker.jl | 13 +++++++------ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 1a5c6c1a..2e3564cc 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -103,6 +103,7 @@ Base.repeat(A::TrackedArray; kw...) = track_kw(repeat, A; kw...) end for f in [:vcat, :hcat] + UArray = :(Union{TrackedArray,Vector,Matrix,Adjoint,Transpose}) @eval begin # This section is a bit of a hack since julia doesn't have a standardised # promotion mechanism for concatenation yet @@ -111,18 +112,18 @@ for f in [:vcat, :hcat] # It should support tracked concatenation with rank ∈ (1,2) with a # TrackedArray anywhere among the arguments This works as long as base has # other functions that captures `(::Union{Vector,RowVector,Matrix}...)`. - Base.$f(a::Union{TrackedArray,Vector,RowVector,Matrix}...) = track($f, a...) + Base.$f(a::$UArray...) = track($f, a...) # It should support tracked concatenation with rank>2 if the TrackedArray is # first Base.$f(a::TrackedArray, b::AbstractArray...) = track($f, a, b...) - Base.$f(a::TrackedArray, b::Union{TrackedArray,Vector,RowVector,Matrix}...) = track($f, a, b...) # resolves ambiguity introduced by previous row + Base.$f(a::TrackedArray, b::$UArray...) = track($f, a, b...) # resolves ambiguity introduced by previous row # It should support tracked concatenation with rank>2 if the TrackedArray is # second Base.$f(a::Array, b::TrackedArray, c::AbstractArray...) = track($f, a, b, c...) - Base.$f(a::Union{Vector,RowVector,Matrix}, b::TrackedArray, - c::Union{TrackedArray,Vector,RowVector,Matrix}...) = + Base.$f(a::Union{Vector,Matrix,Adjoint,Transpose}, b::TrackedArray, + c::$UArray...) = track($f, a, b, c...) # resolves ambiguity introduced by previous row end end @@ -157,11 +158,13 @@ end end end -Base.cat(dims, a::TrackedArray, b::AbstractArray...) = track(cat, dims, a, b...) -Base.cat(dims, a::Union{RowVector,Array}, b::TrackedArray, c::AbstractArray...) = track(cat, dims, a, b, c...) +Base.cat(a::TrackedArray; dims) = track_kw(cat, a, dims = dims) +Base.cat(a::TrackedArray, b::TrackedArray, c::AbstractArray...; dims) = track_kw(cat, a, b, c..., dims = dims) +Base.cat(a::TrackedArray, b::AbstractArray, c::AbstractArray...; dims) = track_kw(cat, a, b, c..., dims = dims) +Base.cat(a::AbstractArray, b::TrackedArray, c::AbstractArray...; dims) = track_kw(cat, a, b, c..., dims = dims) -@grad function cat(dims, Xs...) - cat(dims, data.(Xs)...), function (Δ) +@grad function cat(Xs...; dims) + cat(data.(Xs)..., dims = dims), function (Δ) start = ntuple(i -> 0, Val{ndims(Δ)}) Δs = [begin dim_xs = 1:ndims(xs) @@ -171,7 +174,7 @@ Base.cat(dims, a::Union{RowVector,Array}, b::TrackedArray, c::AbstractArray...) start = start .+ till_xs d end for xs in Xs] - return (nothing, Δs...,) + return (Δs...,) end end diff --git a/test/tracker.jl b/test/tracker.jl index 40443d6d..93897dc2 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -1,3 +1,4 @@ +using Flux using Flux.Tracker, Base.Test, NNlib using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint using NNlib: conv @@ -49,8 +50,8 @@ function promotiontest(f, A, B, C) end @testset "concat" begin - cat1(x...) = cat(1, x...) - cat2(x...) = cat(2, x...) + cat1(x...) = cat(x..., dims = 1) + cat2(x...) = cat(x..., dims = 2) @testset for vcatf in [vcat, cat1] @test gradtest(vcatf, rand(5), rand(3)) @@ -72,17 +73,17 @@ end @test gradtest(hcatf, rand(5), rand(5,2)) end - @testset for catf in [vcat, cat1, hcat, cat2, (x...) -> cat(3, x...), (x...) -> cat((1,2), x...)] + @testset for catf in [vcat, cat1, hcat, cat2, (x...) -> cat(x..., dims = 3), (x...) -> cat(x..., dims = (1,2))] @test gradtest(catf, rand(5)) @test gradtest(catf, rand(5)') @test gradtest(catf, rand(2,5)) @test gradtest(catf, rand(2,5,3)) end - @test gradtest((x...) -> cat(3, x...), rand(2,5,2), rand(2,5,3), rand(2,5,4)) + @test gradtest((x...) -> cat(x..., dims = 3), rand(2,5,2), rand(2,5,3), rand(2,5,4)) @testset "cat($dim, ...)" for dim in 3:5 - catdim = (x...) -> cat(dim, x...) + catdim = (x...) -> cat(x..., dims = dim) @test gradtest(catdim, rand(5), rand(5), rand(5)) @test gradtest(catdim, rand(2,5), rand(2,5), rand(2,5)) @test gradtest(catdim, rand(2,5,3), rand(2,5,3), rand(2,5,3)) @@ -92,7 +93,7 @@ end @test !isa(hcat(rand(2)), TrackedArray) @test !isa(cat(1,rand(2)), TrackedArray) - @test gradtest((a,b)->cat((2,3,5), a, b), rand(2,3), rand(2,4,2,1)) + @test gradtest((a,b)->cat(a, b, dims = (2,3,5)), rand(2,3), rand(2,4,2,1)) @testset "promotiontest" begin @testset for fcat in [hcat, vcat, (x...) -> cat(3, x...), (x...) -> cat((1,2), x...)] From aa209ee137873a1fefff08d5148ffb20e57ba46f Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 20:48:07 +0100 Subject: [PATCH 012/121] no longer needed --- src/layers/basic.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 2da9be11..e9ae061b 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -38,9 +38,6 @@ function Base.show(io::IO, c::Chain) print(io, ")") end -# Seem to need this for `accumulate`; try removing on 0.7 -Base.rcum_promote_type(op, ::Type, ::Type{Any}) = Any - activations(c::Chain, x) = accumulate((x, m) -> m(x), x, c.layers) """ From e14641e4e283815fa9ee2c70426ba5d91db7a534 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 20:48:40 +0100 Subject: [PATCH 013/121] rm CuArrays tests for now --- test/runtests.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 47f7e9e5..1e46c926 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,8 +11,8 @@ include("layers/stateless.jl") include("optimise.jl") include("data.jl") -if Base.find_in_path("CuArrays") ≠ nothing - include("cuda/cuda.jl") -end +# if Base.find_in_path("CuArrays") ≠ nothing +# include("cuda/cuda.jl") +# end end From 297bb5f44e06d4d0e37565c7998bb9075a198436 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 20:53:34 +0100 Subject: [PATCH 014/121] update travis --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 18f92659..bef19a3e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,11 +4,11 @@ os: - linux # - osx julia: - - 0.6 + - 0.7 # uncomment the following lines to override the default test script -script: - - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - - julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)' +# script: +# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi +# - julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)' after_success: - julia -e 'Pkg.add("Documenter")' - julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))' From 474f5785174c6baad1cd61a50b4bc922ba5a8a53 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 20:56:51 +0100 Subject: [PATCH 015/121] ObjectIdDict -> IdDict --- src/tracker/back.jl | 6 +++--- src/tracker/idset.jl | 4 ++-- src/treelike.jl | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tracker/back.jl b/src/tracker/back.jl index 06f62e5d..9ca0377c 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -79,14 +79,14 @@ function Base.show(io::IO, ps::Params) end struct Grads - grads::ObjectIdDict + grads::IdDict{Any,Any} end Base.show(io::IO, ps::Grads) = println(io, "Grads(...)") -Grads() = Grads(ObjectIdDict()) +Grads() = Grads(IdDict()) -Grads(ps::Params) = Grads(ObjectIdDict(tracker(p) => init_grad(data(p)) for p in ps)) +Grads(ps::Params) = Grads(IdDict(tracker(p) => init_grad(data(p)) for p in ps)) Base.getindex(g::Grads, x::Tracked) = g.grads[x] function Base.getindex(g::Grads, x) diff --git a/src/tracker/idset.jl b/src/tracker/idset.jl index 68d1eea1..0d5fade9 100644 --- a/src/tracker/idset.jl +++ b/src/tracker/idset.jl @@ -1,6 +1,6 @@ struct IdSet{T} <: AbstractSet{T} - dict::ObjectIdDict - IdSet{T}() where T = new(ObjectIdDict()) + dict::IdDict{T,Nothing} + IdSet{T}() where T = new(IdDict{T,Nothing}()) end Base.eltype{T}(::IdSet{T}) = T diff --git a/src/treelike.jl b/src/treelike.jl index e65ac41a..e4c4e33f 100644 --- a/src/treelike.jl +++ b/src/treelike.jl @@ -16,7 +16,7 @@ end isleaf(x) = isempty(children(x)) -function mapleaves(f, x; cache = ObjectIdDict()) +function mapleaves(f, x; cache = IdDict()) haskey(cache, x) && return cache[x] cache[x] = isleaf(x) ? f(x) : mapchildren(x -> mapleaves(f, x, cache = cache), x) end From 89872c5a8b91e978018a4f744fb0146c19e5b44f Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 20:59:07 +0100 Subject: [PATCH 016/121] val deprecations --- src/tracker/array.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 2e3564cc..af5e6681 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -165,11 +165,11 @@ Base.cat(a::AbstractArray, b::TrackedArray, c::AbstractArray...; dims) = track_k @grad function cat(Xs...; dims) cat(data.(Xs)..., dims = dims), function (Δ) - start = ntuple(i -> 0, Val{ndims(Δ)}) + start = ntuple(i -> 0, Val(ndims(Δ))) Δs = [begin dim_xs = 1:ndims(xs) - till_xs = ntuple((i -> i in dims ? (i in dim_xs ? size(xs,i) : 1) : 0), Val{ndims(Δ)}) - xs_in_Δ = ntuple(i -> till_xs[i] > 0 ? (start[i]+1:start[i]+till_xs[i]) : Colon(), Val{ndims(Δ)}) + till_xs = ntuple((i -> i in dims ? (i in dim_xs ? size(xs,i) : 1) : 0), Val(ndims(Δ))) + xs_in_Δ = ntuple(i -> till_xs[i] > 0 ? (start[i]+1:start[i]+till_xs[i]) : Colon(), Val(ndims(Δ))) d = reshape(Δ[xs_in_Δ...],size(xs)) start = start .+ till_xs d @@ -350,13 +350,13 @@ end function ∇broadcast(f, args::Vararg{Any,N}) where N sizes = _size.(args) - dargs = map((x,i) -> dualify(data(x), ntuple(j -> i==j, Val{N})), args, ntuple(identity, Val{N})) + dargs = map((x,i) -> dualify(data(x), ntuple(j -> i==j, Val(N))), args, ntuple(identity, Val(N))) out = broadcast(f, dargs...) eltype(out) <: Dual || return out y = value.(out) back = function (Δ_) Δ = data(Δ_) - Δargs = ntuple(i -> getpartial.(Δ, out, i), Val{N}) + Δargs = ntuple(i -> getpartial.(Δ, out, i), Val(N)) dxs = map((x, Δ) -> unbroadcast(x, Δ), sizes, Δargs) nobacksies(:broadcast, dxs) end From 85fd77d70a221a6e3f953de7c08d03432368e582 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 21:08:53 +0100 Subject: [PATCH 017/121] linalg deprecations --- src/tracker/array.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index af5e6681..5b8ddd13 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -1,6 +1,7 @@ import Base: *, == -using LinearAlgebra +import LinearAlgebra +using LinearAlgebra: Transpose, Adjoint, diagm struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} tracker::Tracked{A} From d782b33701f5d624e89d11fd0ce6cc5daf54e242 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 21:11:24 +0100 Subject: [PATCH 018/121] syntax --- src/tracker/idset.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tracker/idset.jl b/src/tracker/idset.jl index 0d5fade9..1bbfec09 100644 --- a/src/tracker/idset.jl +++ b/src/tracker/idset.jl @@ -3,15 +3,15 @@ struct IdSet{T} <: AbstractSet{T} IdSet{T}() where T = new(IdDict{T,Nothing}()) end -Base.eltype{T}(::IdSet{T}) = T +Base.eltype(::IdSet{T}) where T = T IdSet() = IdSet{Any}() -Base.push!{T}(s::IdSet{T}, x::T) = (s.dict[x] = nothing; s) -Base.delete!{T}(s::IdSet{T}, x::T) = (delete!(s.dict, x); s) +Base.push!(s::IdSet{T}, x::T) where T = (s.dict[x] = nothing; s) +Base.delete!(s::IdSet{T}, x::T) where T = (delete!(s.dict, x); s) Base.in(x, s::IdSet) = haskey(s.dict, x) -(::Type{IdSet{T}}){T}(xs) = push!(IdSet{T}(), xs...) +(::Type{IdSet{T}})(xs) where T = push!(IdSet{T}(), xs...) IdSet(xs) = IdSet{eltype(xs)}(xs) From 70718e7a64041354e3fd72be248d4783ac8d39b7 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 12 Jul 2018 22:43:11 +0100 Subject: [PATCH 019/121] update treelike --- docs/src/models/basics.md | 2 +- src/Flux.jl | 2 +- src/layers/basic.jl | 4 ++-- src/layers/conv.jl | 2 +- src/layers/normalise.jl | 2 +- src/layers/recurrent.jl | 8 ++++---- src/treelike.jl | 15 +++++++++++++-- 7 files changed, 23 insertions(+), 12 deletions(-) diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md index 04e8459d..da2a125b 100644 --- a/docs/src/models/basics.md +++ b/docs/src/models/basics.md @@ -211,7 +211,7 @@ m(5) # => 26 Flux provides a set of helpers for custom layers, which you can enable by calling ```julia -Flux.treelike(Affine) +Flux.@treelike Affine ``` This enables a useful extra set of functionality for our `Affine` layer, such as [collecting its parameters](../training/optimisers.md) or [moving it to the GPU](../gpu.md). diff --git a/src/Flux.jl b/src/Flux.jl index 8fc35294..3b7f22b2 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -4,7 +4,7 @@ module Flux # Zero Flux Given -using Juno, Requires, Reexport, StatsBase +using MacroTools, Juno, Requires, Reexport, StatsBase using MacroTools: @forward export Chain, Dense, RNN, LSTM, GRU, Conv, diff --git a/src/layers/basic.jl b/src/layers/basic.jl index e9ae061b..a73816c3 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -73,7 +73,7 @@ function Dense(in::Integer, out::Integer, σ = identity; return Dense(param(initW(out, in)), param(initb(out)), σ) end -treelike(Dense) +@treelike Dense function (a::Dense)(x) W, b, σ = a.W, a.b, a.σ @@ -104,7 +104,7 @@ end Diagonal(in::Integer; initα = ones, initβ = zeros) = Diagonal(param(initα(in)), param(initβ(in))) -treelike(Diagonal) +@treelike Diagonal function (a::Diagonal)(x) α, β = a.α, a.β diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 38310aad..3a6592ed 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -35,7 +35,7 @@ Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = Conv(param(init(k..., ch...)), param(zeros(ch[2])), σ, stride = stride, pad = pad, dilation = dilation) -Flux.treelike(Conv) +@treelike Conv function (c::Conv)(x) # TODO: breaks gpu broadcast :( diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 2b212d9a..8c3f3462 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -58,7 +58,7 @@ end LayerNorm(h::Integer) = LayerNorm(Diagonal(h)) -treelike(LayerNorm) +@treelike LayerNorm (a::LayerNorm)(x) = a.diag(normalise(x)) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 3fc40560..23a9fad1 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -38,7 +38,7 @@ function (m::Recur)(xs...) return y end -treelike(Recur, (:cell, :init)) +@treelike Recur cell, init Base.show(io::IO, m::Recur) = print(io, "Recur(", m.cell, ")") @@ -94,7 +94,7 @@ end hidden(m::RNNCell) = m.h -treelike(RNNCell) +@treelike RNNCell function Base.show(io::IO, l::RNNCell) print(io, "RNNCell(", size(l.Wi, 2), ", ", size(l.Wi, 1)) @@ -143,7 +143,7 @@ end hidden(m::LSTMCell) = (m.h, m.c) -treelike(LSTMCell) +@treelike LSTMCell Base.show(io::IO, l::LSTMCell) = print(io, "LSTMCell(", size(l.Wi, 2), ", ", size(l.Wi, 1)÷4, ")") @@ -184,7 +184,7 @@ end hidden(m::GRUCell) = m.h -treelike(GRUCell) +@treelike GRUCell Base.show(io::IO, l::GRUCell) = print(io, "GRUCell(", size(l.Wi, 2), ", ", size(l.Wi, 1)÷3, ")") diff --git a/src/treelike.jl b/src/treelike.jl index e4c4e33f..3d83d448 100644 --- a/src/treelike.jl +++ b/src/treelike.jl @@ -7,13 +7,24 @@ mapchildren(f, x) = x children(x::Tuple) = x mapchildren(f, x::Tuple) = map(f, x) -function treelike(T, fs = fieldnames(T)) - @eval current_module() begin +function treelike(m::Module, T, fs = fieldnames(T)) + @eval m begin Flux.children(x::$T) = ($([:(x.$f) for f in fs]...),) Flux.mapchildren(f, x::$T) = $T(f.($children(x))...) end end +function treelike(T, fs = fieldnames(T)) + Base.depwarn("`treelike(T)` is deprecated, use `@treelike T`", :treelike) + treelike(Base._current_module(), T, fs) +end + +macro treelike(T, fs = nothing) + fs == nothing || isexpr(fs, :tuple) || error("@treelike T (a, b)") + fs = fs == nothing ? [] : [:($(map(QuoteNode, fs.args)...),)] + :(treelike(@__MODULE__, $(esc(T)), $(fs...))) +end + isleaf(x) = isempty(children(x)) function mapleaves(f, x; cache = IdDict()) From a50432324b820c55023aabe85e76486b87e6d06c Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Mon, 30 Jul 2018 16:27:30 +0100 Subject: [PATCH 020/121] rm broken test --- test/tracker.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/tracker.jl b/test/tracker.jl index 93897dc2..fb64d15a 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -209,9 +209,6 @@ end @testset "Fallbacks" begin xs = param([1 2; 3 4]) @test similar(xs) isa Matrix{Float64} - # Remove this test if we do LowerTriangular properly - L = LowerTriangular(xs) - @test L*L' isa Matrix{TrackedReal{Float64}} end @test @sprintf("%.2f", sum(param([1,2,3]))) == "6.00" From 4cf6bac0c1454bf89318e7308fdb3291f9c4d3e1 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Mon, 30 Jul 2018 16:32:43 +0100 Subject: [PATCH 021/121] fix hook --- src/tracker/Tracker.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tracker/Tracker.jl b/src/tracker/Tracker.jl index 63146f5f..2d805af9 100644 --- a/src/tracker/Tracker.jl +++ b/src/tracker/Tracker.jl @@ -87,7 +87,7 @@ Hook into gradient backpropagation. `x` is unmodified, but when backpropagating the sign of the gradient applied to `x`. """ hook(f, x) = istracked(x) ? track(hook, f, x) : x -@grad hook(f, x) = x, Δ -> (nothing, f(Δ)) +@grad hook(f, x) = data(x), Δ -> (nothing, f(Δ)) """ checkpoint(f, args...) From f5c9361617887dea943bce7c5efacb29a1a230c7 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Mon, 30 Jul 2018 17:04:18 +0100 Subject: [PATCH 022/121] matmul fix --- src/tracker/array.jl | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 5b8ddd13..7111d780 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -276,20 +276,29 @@ LinearAlgebra.diagm(x::TrackedVector) = track(diagm, x) @grad diagm(x) = diagm(data(x)), Δ -> (diag(Δ),) x::TrackedMatrix * y::AbstractMatrix = track(*, x, y) -y::AbstractMatrix * x::TrackedMatrix = track(*, x, y) +x::AbstractMatrix * y::TrackedMatrix = track(*, x, y) x::TrackedMatrix * y::TrackedMatrix = track(*, x, y) x::TrackedMatrix * y::AbstractVector = track(*, x, y) -y::AbstractMatrix * x::TrackedVector = track(*, x, y) +x::AbstractMatrix * y::TrackedVector = track(*, x, y) x::TrackedMatrix * y::TrackedVector = track(*, x, y) x::TrackedVector * y::AbstractVector = track(*, x, y) -y::AbstractVector * x::TrackedVector = track(*, x, y) +x::AbstractVector * y::TrackedVector = track(*, x, y) x::TrackedVector * y::TrackedVector = track(*, x, y) @grad a::AbstractMatrix * b::AbstractVecOrMat = data(a)*data(b), Δ -> (Δ * transpose(b), transpose(a) * Δ) +# @grad function (a::AbstractMatrix * b::AbstractVecOrMat) +# # @show size(a) size(b) +# data(a)*data(b), function (Δ) +# @show size(Δ) size(b) size(Δ*transpose(b)) size(Δ*transpose(data(b))) +# @show typeof(Δ) typeof(b) +# (Δ * transpose(b), transpose(a) * Δ) +# end +# end + # NNlib using NNlib From e5b3d270167ae6c6f89ee7c3895483cd9e3549fb Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 3 Aug 2018 15:14:10 +0100 Subject: [PATCH 023/121] track_kw should be unnecessary --- src/tracker/Tracker.jl | 9 +-------- src/tracker/array.jl | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/tracker/Tracker.jl b/src/tracker/Tracker.jl index 2d805af9..2c4951a9 100644 --- a/src/tracker/Tracker.jl +++ b/src/tracker/Tracker.jl @@ -46,14 +46,7 @@ track(f::Call, x) = Tracked{typeof(x)}(f) function _forward end -function track(f::F, xs...) where F - y, back = _forward(f, xs...) - ts = map(tracker, xs) - c = Call(back, ts) - track(c, y) -end - -function track_kw(f::F, xs...; kw...) where F +function track(f::F, xs...; kw...) where F y, back = _forward(f, xs...; kw...) track(Call(back, tracker.(xs)), y) end diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 7111d780..13dfe393 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -85,7 +85,7 @@ Base.adjoint(xs::TrackedArray) = track(adjoint, xs) @grad transpose(xs) = transpose(data(xs)), Δ -> (reshape(transpose(Δ), size(xs)),) @grad adjoint(xs) = data(xs)', Δ -> (reshape(Δ', size(xs)),) -Base.repeat(A::TrackedArray; kw...) = track_kw(repeat, A; kw...) +Base.repeat(A::TrackedArray; kw...) = track(repeat, A; kw...) @grad function repeat(xs; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A))) repeat(data(xs), inner = inner, outer = outer), function (Δ) @@ -159,10 +159,10 @@ end end end -Base.cat(a::TrackedArray; dims) = track_kw(cat, a, dims = dims) -Base.cat(a::TrackedArray, b::TrackedArray, c::AbstractArray...; dims) = track_kw(cat, a, b, c..., dims = dims) -Base.cat(a::TrackedArray, b::AbstractArray, c::AbstractArray...; dims) = track_kw(cat, a, b, c..., dims = dims) -Base.cat(a::AbstractArray, b::TrackedArray, c::AbstractArray...; dims) = track_kw(cat, a, b, c..., dims = dims) +Base.cat(a::TrackedArray; dims) = track(cat, a, dims = dims) +Base.cat(a::TrackedArray, b::TrackedArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) +Base.cat(a::TrackedArray, b::AbstractArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) +Base.cat(a::AbstractArray, b::TrackedArray, c::AbstractArray...; dims) = track(cat, a, b, c..., dims = dims) @grad function cat(Xs...; dims) cat(data.(Xs)..., dims = dims), function (Δ) @@ -312,9 +312,9 @@ logsoftmax(xs::TrackedArray) = track(logsoftmax, xs) @grad logsoftmax(xs) = logsoftmax(data(xs)), Δ -> (nobacksies(:logsoftmax, ∇logsoftmax(data(Δ), data(xs))),) -conv(x::TrackedArray, w::TrackedArray; kw...) = track_kw(conv, x, w; kw...) -conv(x::AbstractArray, w::TrackedArray; kw...) = track_kw(conv, x, w; kw...) -conv(x::TrackedArray, w::AbstractArray; kw...) = track_kw(conv, x, w; kw...) +conv(x::TrackedArray, w::TrackedArray; kw...) = track(conv, x, w; kw...) +conv(x::AbstractArray, w::TrackedArray; kw...) = track(conv, x, w; kw...) +conv(x::TrackedArray, w::AbstractArray; kw...) = track(conv, x, w; kw...) @grad conv(x, w; kw...) = conv(data(x), data(w); kw...), @@ -322,14 +322,14 @@ conv(x::TrackedArray, w::AbstractArray; kw...) = track_kw(conv, x, w; kw...) (NNlib.∇conv_data(data.((Δ, x, w))...; kw...), NNlib.∇conv_filter(data.((Δ, x, w))...; kw...))) -maxpool(x::TrackedArray, k; kw...) = track_kw(maxpool, x, k; kw...) +maxpool(x::TrackedArray, k; kw...) = track(maxpool, x, k; kw...) @grad function maxpool(x, k; kw...) y = maxpool(data(x), k; kw...) y, Δ -> (nobacksies(:maxpool, NNlib.∇maxpool(data.((Δ, y, x))..., k; kw...)), nothing) end -meanpool(x::TrackedArray, k; kw...) = track_kw(meanpool, x, k; kw...) +meanpool(x::TrackedArray, k; kw...) = track(meanpool, x, k; kw...) @grad function meanpool(x, k; kw...) y = meanpool(data(x), k; kw...) From e98538673a6232cdf56ed0cc0adee3360f13f841 Mon Sep 17 00:00:00 2001 From: pevnak Date: Tue, 17 Jul 2018 16:57:39 +0200 Subject: [PATCH 024/121] updated sum to be compliant with latest beta. Removed some depwarns --- src/layers/basic.jl | 4 ++-- src/tracker/array.jl | 23 ++++++++++++++--------- src/utils.jl | 4 ++-- test/tracker.jl | 17 ++++++++++------- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index a73816c3..8f9da6ff 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -28,7 +28,7 @@ children(c::Chain) = c.layers mapchildren(f, c::Chain) = Chain(f.(c.layers)...) adapt(T, c::Chain) = Chain(map(x -> adapt(T, x), c.layers)...) -(c::Chain)(x) = foldl((x, m) -> m(x), x, c.layers) +(c::Chain)(x) = foldl((x, m) -> m(x), c.layers; init = x) Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...) @@ -101,7 +101,7 @@ struct Diagonal{T} β::T end -Diagonal(in::Integer; initα = ones, initβ = zeros) = +Diagonal(in::Integer; initα = ones, initβ = (x) -> similar(x) .= 0) = Diagonal(param(initα(in)), param(initβ(in))) @treelike Diagonal diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 13dfe393..e1e17382 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -1,6 +1,7 @@ import Base: *, == import LinearAlgebra +using Statistics using LinearAlgebra: Transpose, Adjoint, diagm struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} @@ -26,7 +27,7 @@ TrackedArray(c::Call, x::A) where A <: AbstractArray = TrackedArray(c::Call, x::A, Δ::A) where A <: AbstractArray = TrackedArray{eltype(A),ndims(A),A}(Tracked{A}(c, Δ), x, Δ) -TrackedArray(x::AbstractArray) = TrackedArray(Call(), x, zeros(x)) +TrackedArray(x::AbstractArray) = TrackedArray(Call(), x, similar(x) .= 0) Base.eltype(x::Type{<:TrackedArray{T}}) where T <: Real = TrackedReal{T} @@ -204,12 +205,16 @@ Base.kron(a::AbstractMatrix, b::TrackedMatrix) = _kron(a, b) # Reductions -Base.sum(xs::TrackedArray, dim) = track(sum, xs, dim) +Base.sum(xs::TrackedArray; dims) = track(sum, xs, dims) Base.sum(xs::TrackedArray) = track(sum, xs) Base.sum(f::Union{Function,Type},xs::TrackedArray) = sum(f.(xs)) -@grad sum(xs, dim...) = sum(data(xs), dim...), - Δ -> (zero(xs) .+ Δ, map(_->nothing,dim)...) +@grad sum(xs, dims::Int) = sum(data(xs), dims = dims), + Δ -> (zero(xs) .+ Δ, nothing) +@grad sum(xs, dims) = sum(data(xs), dims = dims), + Δ -> (zero(xs) .+ Δ, map(_->nothing,dims)...) +@grad sum(xs) = sum(data(xs)), + Δ -> (zero(xs) .+ Δ,) Base.prod(xs::TrackedArray, dim) = track(prod, xs, dim) Base.prod(xs::TrackedArray) = track(prod, xs) @@ -223,8 +228,8 @@ Base.prod(f::Union{Function, Type}, xs::TrackedArray) = prod(f.(xs)) Base.findfirst(xs::TrackedArray, args...) = findfirst(xs.data, args...) -Base.mean(xs::TrackedArray) = track(mean, xs) -Base.mean(xs::TrackedArray, region) = track(mean, xs, region) +Statistics.mean(xs::TrackedArray) = track(mean, xs) +Statistics.mean(xs::TrackedArray, region) = track(mean, xs, region) Base.maximum(xs::TrackedArray) = track(maximum, xs) Base.maximum(xs::TrackedArray, region) = track(maximum, xs, region) @@ -242,9 +247,9 @@ dot(xs::TrackedVector, ys::AbstractVector) = track(dot, xs, ys) using StatsBase # Hacks to get std working -StatsBase.std(x::TrackedArray; mean = Base.mean(x)) = +StatsBase.std(x::TrackedArray; mean = Statistics.mean(x)) = sqrt.(sum((x .- mean).^2) ./ (length(x)-1)) -StatsBase.std(x::TrackedArray, dim; mean = Base.mean(x, dim)) = +StatsBase.std(x::TrackedArray, dim; mean = Statistics.mean(x, dim)) = sqrt.(sum((x .- mean).^2, dim) ./ (size(x, dim)-1)) LinearAlgebra.vecnorm(x::TrackedArray, p::Real = 2) = @@ -349,7 +354,7 @@ dualify(xs::Real, ps) = Dual(xs, ps) unbroadcast(x::Tuple, Δ) = x == size(Δ) ? Δ : - reshape(sum(Δ, filter(n -> n > length(x) || x[n] == 1, 1:ndims(Δ))), x) + reshape(sum(Δ, dims = filter(n -> n > length(x) || x[n] == 1, 1:ndims(Δ))), x) unbroadcast(x::Tuple{}, Δ) = sum(Δ) diff --git a/src/utils.jl b/src/utils.jl index fbb05934..c187401d 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,8 +1,8 @@ # Arrays initn(dims...) = randn(dims...)/100 -glorot_uniform(dims...) = (rand(dims...) - 0.5)*sqrt(24.0/(sum(dims))) -glorot_normal(dims...) = (randn(dims...)*sqrt(2.0/sum(dims))) +glorot_uniform(dims...) = (rand(dims...) .- 0.5) .* sqrt(24.0/(sum(dims))) +glorot_normal(dims...) = randn(dims...) .* sqrt(2.0/sum(dims)) unsqueeze(xs, dim) = reshape(xs, (size(xs)[1:dim-1]..., 1, size(xs)[dim:end]...)) diff --git a/test/tracker.jl b/test/tracker.jl index fb64d15a..e27f4615 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -14,10 +14,13 @@ gradtest(f, dims...) = gradtest(f, rand.(dims)...) @test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2) -@test gradtest((w, x) -> w'*x, randn(10, 2), randn(10)) -@test gradtest((w, x) -> w*x', randn(5,5), randn(5,5)) +@test gradtest((w, x) -> w'*x, randn(Float64,10, 2), randn(Float64,10)) +@test gradtest((w, x) -> w*x', randn(Float64,5,5), randn(Float64,5,5)) -@test gradtest(x -> sum(x, (2, 3)), (3,4,5)) +@test gradtest(x -> sum(x, dims = (2, 3)), (3,4,5)) +@test gradtest(x -> sum(x, dims = 1), randn(Float64,2,3)) +@test gradtest(x -> sum(x, dims = [1,2]), randn(Float64,2,3)) +@test gradtest(x -> sum(x), randn(Float64,2,3)) @test gradtest(x -> prod(x, (2, 3)), (3,4,5)) @test gradtest(x -> prod(x), (3,4,5)) @@ -167,9 +170,9 @@ end 2y + x end -@test gradtest(conv, rand(10, 3, 2), randn(2, 3, 2)) -@test gradtest(conv, rand(10, 10, 3, 2), randn(2, 2, 3, 2)) -@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(2, 2, 2, 3, 2)) +@test gradtest(conv, rand(10, 3, 2), randn(Float64,2, 3, 2)) +@test gradtest(conv, rand(10, 10, 3, 2), randn(Float64,2, 2, 3, 2)) +@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(Float64,2, 2, 2, 3, 2)) @test gradtest(x -> maxpool(x, (2,2)), rand(10, 10, 3, 2)) @test gradtest(x -> maxpool(x, (2,2,2)), rand(10, 10, 10, 3, 2)) @@ -213,7 +216,7 @@ end @test @sprintf("%.2f", sum(param([1,2,3]))) == "6.00" -@inferred NNlib.conv(param(rand(10,10,3,2)),randn(2,2,3,4)) +@inferred NNlib.conv(param(rand(10,10,3,2)),randn(Float64,2,2,3,4)) b = param(rand()) Tracker.back!(b) From 8ab209126d9f8478dd7fb8ab351f37b0bab22a60 Mon Sep 17 00:00:00 2001 From: pevnak Date: Tue, 17 Jul 2018 17:13:55 +0200 Subject: [PATCH 025/121] removed zeros fix --- src/layers/basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 8f9da6ff..d461c95c 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -101,7 +101,7 @@ struct Diagonal{T} β::T end -Diagonal(in::Integer; initα = ones, initβ = (x) -> similar(x) .= 0) = +Diagonal(in::Integer; initα = ones, initβ = zeros) = Diagonal(param(initα(in)), param(initβ(in))) @treelike Diagonal From d6f5baee398f2e71c5bb9d0c904617bb156639e9 Mon Sep 17 00:00:00 2001 From: pevnak Date: Wed, 18 Jul 2018 07:51:40 +0200 Subject: [PATCH 026/121] fixed fixes proposed by Carlo --- src/tracker/array.jl | 2 +- test/tracker.jl | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index e1e17382..a3cbc78a 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -27,7 +27,7 @@ TrackedArray(c::Call, x::A) where A <: AbstractArray = TrackedArray(c::Call, x::A, Δ::A) where A <: AbstractArray = TrackedArray{eltype(A),ndims(A),A}(Tracked{A}(c, Δ), x, Δ) -TrackedArray(x::AbstractArray) = TrackedArray(Call(), x, similar(x) .= 0) +TrackedArray(x::AbstractArray) = TrackedArray(Call(), x, zero(x)) Base.eltype(x::Type{<:TrackedArray{T}}) where T <: Real = TrackedReal{T} diff --git a/test/tracker.jl b/test/tracker.jl index e27f4615..745329f3 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -14,13 +14,13 @@ gradtest(f, dims...) = gradtest(f, rand.(dims)...) @test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2) -@test gradtest((w, x) -> w'*x, randn(Float64,10, 2), randn(Float64,10)) -@test gradtest((w, x) -> w*x', randn(Float64,5,5), randn(Float64,5,5)) +@test gradtest((w, x) -> w'*x, randn(10, 2), randn(10)) +@test gradtest((w, x) -> w*x', randn(5,5), randn(5,5)) @test gradtest(x -> sum(x, dims = (2, 3)), (3,4,5)) -@test gradtest(x -> sum(x, dims = 1), randn(Float64,2,3)) -@test gradtest(x -> sum(x, dims = [1,2]), randn(Float64,2,3)) -@test gradtest(x -> sum(x), randn(Float64,2,3)) +@test gradtest(x -> sum(x, dims = 1), randn(2,3)) +@test gradtest(x -> sum(x, dims = [1,2]), randn(2,3)) +@test gradtest(x -> sum(x), randn(2,3)) @test gradtest(x -> prod(x, (2, 3)), (3,4,5)) @test gradtest(x -> prod(x), (3,4,5)) @@ -170,9 +170,9 @@ end 2y + x end -@test gradtest(conv, rand(10, 3, 2), randn(Float64,2, 3, 2)) -@test gradtest(conv, rand(10, 10, 3, 2), randn(Float64,2, 2, 3, 2)) -@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(Float64,2, 2, 2, 3, 2)) +@test gradtest(conv, rand(10, 3, 2), randn(2, 3, 2)) +@test gradtest(conv, rand(10, 10, 3, 2), randn(2, 2, 3, 2)) +@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(2, 2, 2, 3, 2)) @test gradtest(x -> maxpool(x, (2,2)), rand(10, 10, 3, 2)) @test gradtest(x -> maxpool(x, (2,2,2)), rand(10, 10, 10, 3, 2)) @@ -216,7 +216,7 @@ end @test @sprintf("%.2f", sum(param([1,2,3]))) == "6.00" -@inferred NNlib.conv(param(rand(10,10,3,2)),randn(Float64,2,2,3,4)) +@inferred NNlib.conv(param(rand(10,10,3,2)),randn(2,2,3,4)) b = param(rand()) Tracker.back!(b) From ea38c7dbeaa34feb7d7e00291f484201dd8b817d Mon Sep 17 00:00:00 2001 From: pevnak Date: Wed, 18 Jul 2018 08:41:10 +0200 Subject: [PATCH 027/121] some more changes --- src/tracker/array.jl | 2 +- src/tracker/numeric.jl | 2 +- test/tracker.jl | 20 ++++++++++---------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index a3cbc78a..4546cdc3 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -221,7 +221,7 @@ Base.prod(xs::TrackedArray) = track(prod, xs) Base.prod(f::Union{Function, Type}, xs::TrackedArray) = prod(f.(xs)) @grad prod(xs) = prod(data(xs)), Δ -> (prod(xs) ./ xs .* Δ,) -@grad prod(xs, dim) = prod(data(xs), dim), +@grad prod(xs, dim) = prod(data(xs), dims = dim), Δ -> (nobacksies(:sum, reshape(.*(circshift.([reshape(data(xs), length(xs))], 1:length(xs)-1)...), size(xs)) .* Δ), nothing) diff --git a/src/tracker/numeric.jl b/src/tracker/numeric.jl index 1ad872e4..112117ed 100644 --- a/src/tracker/numeric.jl +++ b/src/tracker/numeric.jl @@ -1,5 +1,5 @@ function ngradient(f, xs::AbstractArray...) - grads = zeros.(xs) + grads = zero.(xs) for (x, Δ) in zip(xs, grads), i in 1:length(x) δ = sqrt(eps()) tmp = x[i] diff --git a/test/tracker.jl b/test/tracker.jl index 745329f3..18479485 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -2,7 +2,7 @@ using Flux using Flux.Tracker, Base.Test, NNlib using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint using NNlib: conv -using StatsBase +# using StatsBase gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...) gradtest(f, dims...) = gradtest(f, rand.(dims)...) @@ -14,13 +14,13 @@ gradtest(f, dims...) = gradtest(f, rand.(dims)...) @test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2) -@test gradtest((w, x) -> w'*x, randn(10, 2), randn(10)) -@test gradtest((w, x) -> w*x', randn(5,5), randn(5,5)) +@test gradtest((w, x) -> w'*x, randn(Float64,10, 2), randn(Float64,10)) +@test gradtest((w, x) -> w*x', randn(Float64,5,5), randn(Float64,5,5)) @test gradtest(x -> sum(x, dims = (2, 3)), (3,4,5)) -@test gradtest(x -> sum(x, dims = 1), randn(2,3)) -@test gradtest(x -> sum(x, dims = [1,2]), randn(2,3)) -@test gradtest(x -> sum(x), randn(2,3)) +@test gradtest(x -> sum(x, dims = 1), randn(Float64,2,3)) +@test gradtest(x -> sum(x, dims = [1,2]), randn(Float64,2,3)) +@test gradtest(x -> sum(x), randn(Float64,2,3)) @test gradtest(x -> prod(x, (2, 3)), (3,4,5)) @test gradtest(x -> prod(x), (3,4,5)) @@ -170,9 +170,9 @@ end 2y + x end -@test gradtest(conv, rand(10, 3, 2), randn(2, 3, 2)) -@test gradtest(conv, rand(10, 10, 3, 2), randn(2, 2, 3, 2)) -@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(2, 2, 2, 3, 2)) +@test gradtest(conv, rand(10, 3, 2), randn(Float64,2, 3, 2)) +@test gradtest(conv, rand(10, 10, 3, 2), randn(Float64,2, 2, 3, 2)) +@test gradtest(conv, rand(10, 10, 10, 3, 2), randn(Float64,2, 2, 2, 3, 2)) @test gradtest(x -> maxpool(x, (2,2)), rand(10, 10, 3, 2)) @test gradtest(x -> maxpool(x, (2,2,2)), rand(10, 10, 10, 3, 2)) @@ -216,7 +216,7 @@ end @test @sprintf("%.2f", sum(param([1,2,3]))) == "6.00" -@inferred NNlib.conv(param(rand(10,10,3,2)),randn(2,2,3,4)) +@inferred NNlib.conv(param(rand(10,10,3,2)),randn(Float64,2,2,3,4)) b = param(rand()) Tracker.back!(b) From 3510c837a842f023ebf9e6f7e739a0b4fe699c31 Mon Sep 17 00:00:00 2001 From: pevnak Date: Wed, 18 Jul 2018 09:01:06 +0200 Subject: [PATCH 028/121] zeros replaced by zero --- src/Flux.jl | 2 +- src/cuda/cudnn.jl | 6 +++--- src/data/cmudict.jl | 8 ++++---- src/layers/conv.jl | 2 +- src/layers/normalise.jl | 2 +- src/layers/recurrent.jl | 6 +++--- src/optimise/Optimise.jl | 2 +- src/optimise/optimisers.jl | 30 +++++++++++++++--------------- src/utils.jl | 2 +- test/layers/normalisation.jl | 2 +- test/runtests.jl | 2 +- test/tracker.jl | 2 +- 12 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index 3b7f22b2..239a561d 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -4,7 +4,7 @@ module Flux # Zero Flux Given -using MacroTools, Juno, Requires, Reexport, StatsBase +using MacroTools, Juno, Requires, Reexport, StatsBase, Random using MacroTools: @forward export Chain, Dense, RNN, LSTM, GRU, Conv, diff --git a/src/cuda/cudnn.jl b/src/cuda/cudnn.jl index fe1b76d2..ba6c8fd7 100644 --- a/src/cuda/cudnn.jl +++ b/src/cuda/cudnn.jl @@ -83,7 +83,7 @@ function RNNDesc{T}(mode::Int, input::Int, hidden::Int; layers = 1) where T @check ccall((:cudnnSetRNNDescriptor_v6,libcudnn), cudnnStatus_t, (Ptr{Nothing},Ptr{Nothing},Cint,Cint,Ptr{Nothing},Cint,Cint,Cint,Cint,Cint), libcudnn_handle[],d[],hidden,layers,dropoutDesc,inputMode,direction,mode,algo,cudnnDataType(T)) - w = cuzeros(T, rnnParamSize(T, d[], input)) + w = cuzero(T, rnnParamSize(T, d[], input)) # TODO: avoid reserve allocation here rd = RNNDesc{T}(mode, input, hidden, w, params(w, input, hidden, ngates(mode))..., d[]) finalizer(rd, x -> @@ -198,7 +198,7 @@ end function backwardData(rnn::RNNDesc{T}, y, dy_, dho, dco, h, c, reserve) where T # Same as above, any more efficient way? - dy = dy_ isa Integer ? zeros(y) : dy_ + dy = dy_ isa Integer ? zero(y) : dy_ yd = xDesc(y) dx = y isa AbstractVector ? similar(dy, rnn.input) : similar(dy, rnn.input, size(dy, 2)) dh = similar(h) @@ -229,7 +229,7 @@ function cudnnRNNBackwardWeights(rnn::RNNDesc{T}, seqlen, xd, x, hd, h, yd, y, d end function backwardWeights(rnn::RNNDesc{T}, x, h, y, reserve) where T - dw = zeros(rnn.params) + dw = zero(rnn.params) cudnnRNNBackwardWeights(rnn, 1, xDesc(x), x, hDesc(h)..., xDesc(y), y, FilterDesc(T, (1, 1, length(dw))), dw, diff --git a/src/data/cmudict.jl b/src/data/cmudict.jl index b3257734..502a9670 100644 --- a/src/data/cmudict.jl +++ b/src/data/cmudict.jl @@ -24,23 +24,23 @@ end function phones() load() - Symbol.(first.(split.(split(readstring(deps("cmudict", "cmudict.phones")), + Symbol.(first.(split.(split(read(deps("cmudict", "cmudict.phones"),String), "\n", keep = false), "\t"))) end function symbols() load() - Symbol.(split(readstring(deps("cmudict", "cmudict.symbols")), + Symbol.(split(read(deps("cmudict", "cmudict.symbols"),String), "\n", keep = false)) end function rawdict() load() Dict(String(xs[1]) => Symbol.(xs[2:end]) for xs in - filter(!isempty, split.(split(readstring(deps("cmudict", "cmudict")), "\n")))) + filter(!isempty, split.(split(read(deps("cmudict", "cmudict"),String), "\n")))) end -validword(s) = isascii(s) && ismatch(r"^[\w\-\.]+$", s) +validword(s) = isascii(s) && occursin(r"^[\w\-\.]+$", s) cmudict() = filter((s, ps) -> validword(s), rawdict()) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 3a6592ed..e73e582f 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -32,7 +32,7 @@ Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn, stride = 1, pad = 0, dilation = 1) where N = - Conv(param(init(k..., ch...)), param(zeros(ch[2])), σ, + Conv(param(init(k..., ch...)), param(zero(ch[2])), σ, stride = stride, pad = pad, dilation = dilation) @treelike Conv diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 8c3f3462..6e739a52 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -110,7 +110,7 @@ end BatchNorm(chs::Integer, λ = identity; initβ = zeros, initγ = ones, ϵ = 1e-8, momentum = .1) = BatchNorm(λ, param(initβ(chs)), param(initγ(chs)), - zeros(chs), ones(chs), ϵ, momentum, true) + zero(chs), ones(chs), ϵ, momentum, true) function (BN::BatchNorm)(x) size(x, ndims(x)-1) == length(BN.β) || diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 23a9fad1..969a777e 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -84,7 +84,7 @@ end RNNCell(in::Integer, out::Integer, σ = tanh; init = glorot_uniform) = RNNCell(σ, param(init(out, in)), param(init(out, out)), - param(zeros(out)), param(initn(out))) + param(zero(out)), param(initn(out))) function (m::RNNCell)(h, x) σ, Wi, Wh, b = m.σ, m.Wi, m.Wh, m.b @@ -122,7 +122,7 @@ end function LSTMCell(in::Integer, out::Integer; init = glorot_uniform) - cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(zeros(out*4)), + cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(zero(out*4)), param(initn(out)), param(initn(out))) cell.b.data[gate(out, 2)] = 1 return cell @@ -170,7 +170,7 @@ end GRUCell(in, out; init = glorot_uniform) = GRUCell(param(init(out*3, in)), param(init(out*3, out)), - param(zeros(out*3)), param(initn(out))) + param(zero(out*3)), param(initn(out))) function (m::GRUCell)(h, x) b, o = m.b, size(h, 1) diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index 810793b6..6765a778 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -9,7 +9,7 @@ struct Param{T} Δ::T end -Base.convert(::Type{Param}, x::AbstractArray) = Param(x, zeros(x)) +Base.convert(::Type{Param}, x::AbstractArray) = Param(x, zero(x)) include("optimisers.jl") include("interface.jl") diff --git a/src/optimise/optimisers.jl b/src/optimise/optimisers.jl index 112aaa73..1f7a7c9c 100644 --- a/src/optimise/optimisers.jl +++ b/src/optimise/optimisers.jl @@ -14,7 +14,7 @@ function descentweightdecay(p::Param, η::Real, γ::Real) end function momentum(p::Param, ρ, η) - v = zeros(p.x) + v = zero(p.x) function () @. v = ρ * v - η * p.Δ @. p.Δ = -v @@ -23,7 +23,7 @@ end # Ref. https://arxiv.org/pdf/1212.0901.pdf function nesterov(p::Param, ρ, η) - v = zeros(p.x) + v = zero(p.x) function () d = @. ρ^2 * v - (1+ρ) * η * p.Δ @. v = ρ*v - η*p.Δ @@ -32,7 +32,7 @@ function nesterov(p::Param, ρ, η) end function rmsprop(p::Param; η::Real = 0.001, ρ::Real = 0.9, ϵ::Real = 1e-8) - acc = zeros(p.x) + acc = zero(p.x) function () @. acc = ρ * acc + (1 - ρ) * p.Δ^2 @. p.Δ *= η / √(acc + ϵ) @@ -40,7 +40,7 @@ function rmsprop(p::Param; η::Real = 0.001, ρ::Real = 0.9, ϵ::Real = 1e-8) end function adagrad(p::Param; η::Real = 0.01, ϵ::Real = 1e-8) - acc = zeros(p.x) .+ ϵ + acc = zero(p.x) .+ ϵ function () @. acc += p.Δ^2 @. p.Δ *= η / √(acc + ϵ) @@ -48,8 +48,8 @@ function adagrad(p::Param; η::Real = 0.01, ϵ::Real = 1e-8) end function adadelta(p::Param; ρ::Real = 0.9, ϵ::Real = 1e-8) - acc = zeros(p.x) - Δacc = zeros(p.x) + acc = zero(p.x) + Δacc = zero(p.x) function () @. acc = ρ * acc + (1 - ρ) * p.Δ^2 @. p.Δ *= √(Δacc + ϵ) / √(acc + ϵ) @@ -58,8 +58,8 @@ function adadelta(p::Param; ρ::Real = 0.9, ϵ::Real = 1e-8) end function adam(p::Param; η::Real = 0.001, β1::Real = 0.9, β2::Real = 0.999, ϵ::Real = 1e-8) - mt = zeros(p.x) - vt = zeros(p.x) + mt = zero(p.x) + vt = zero(p.x) β1p, β2p = β1, β2 function () @. mt = β1 * mt + (1 - β1) * p.Δ @@ -71,8 +71,8 @@ function adam(p::Param; η::Real = 0.001, β1::Real = 0.9, β2::Real = 0.999, ϵ end function adamax(p::Param; η::Real = 0.002, β1::Real = 0.9, β2::Real = 0.999, ϵ::Real = 1e-8) - mt = zeros(p.x) - ut = zeros(p.x) + mt = zero(p.x) + ut = zero(p.x) β1p = β1 function () @. mt = β1 * mt + (1 - β1) * p.Δ @@ -83,9 +83,9 @@ function adamax(p::Param; η::Real = 0.002, β1::Real = 0.9, β2::Real = 0.999, end function amsgrad(p::Param; η::Real = 0.001, β1::Real = 0.9, β2::Real = 0.999, ϵ::Real = 1e-8) - mt = zeros(p.x) - vt = zeros(p.x) .+ ϵ - v̂t = zeros(p.x) .+ ϵ + mt = zero(p.x) + vt = zero(p.x) .+ ϵ + v̂t = zero(p.x) .+ ϵ function () @. mt = β1 * mt + (1 - β1) * p.Δ @. vt = β2 * vt + (1 - β2) * p.Δ ^ 2 @@ -95,8 +95,8 @@ function amsgrad(p::Param; η::Real = 0.001, β1::Real = 0.9, β2::Real = 0.999, end function nadam(p::Param; η::Real = 0.001, β1::Real = 0.9, β2::Real = 0.999, ϵ::Real = 1e-8) - mt = zeros(p.x) - vt = zeros(p.x) + mt = zero(p.x) + vt = zero(p.x) β1p, β2p = β1, β2 function () @. mt = β1 * mt + (1 - β1) * p.Δ diff --git a/src/utils.jl b/src/utils.jl index c187401d..c53f7864 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -145,7 +145,7 @@ function jacobian(m,x) y = m(xp) k = length(y) n = length(x) - J = Matrix{eltype(x)}(n,k) + J = Matrix{eltype(x)}(undef,n,k) for i = 1:k Flux.back!(y[i]) # Populate gradient accumulator J[:,i] = xp.grad diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl index 0fdb1021..a7a7ada2 100644 --- a/test/layers/normalisation.jl +++ b/test/layers/normalisation.jl @@ -4,7 +4,7 @@ using Flux: testmode! x = [1.,2.,3.] @test x == testmode!(Dropout(0.1))(x) @test x == Dropout(0)(x) - @test zeros(x) == Dropout(1)(x) + @test zero(x) == Dropout(1)(x) x = rand(100) m = Dropout(0.9) diff --git a/test/runtests.jl b/test/runtests.jl index 1e46c926..6d698784 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -using Flux, Base.Test +using Flux, Test, Random srand(0) diff --git a/test/tracker.jl b/test/tracker.jl index 18479485..812bd5aa 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -99,7 +99,7 @@ end @test gradtest((a,b)->cat(a, b, dims = (2,3,5)), rand(2,3), rand(2,4,2,1)) @testset "promotiontest" begin - @testset for fcat in [hcat, vcat, (x...) -> cat(3, x...), (x...) -> cat((1,2), x...)] + @testset for fcat in [hcat, vcat, (x...) -> cat(x..., dims = 3), (x...) -> cat(x..., dims = (1,2))] promotiontest(fcat, rand(2), rand(2), rand(2)) promotiontest(fcat, rand(2)', rand(2)', rand(2)') promotiontest(fcat, rand(2,2), rand(2,2), rand(2,2)) From 0471c489e6b66b6d1a2f9b0e6af97f68d48a40b3 Mon Sep 17 00:00:00 2001 From: Simon Mandlik Date: Wed, 18 Jul 2018 15:39:20 +0200 Subject: [PATCH 029/121] depwarns --- src/data/cmudict.jl | 6 +++--- src/onehot.jl | 4 ++-- src/tracker/array.jl | 6 +++--- src/tracker/back.jl | 2 +- test/cuda/cuda.jl | 2 +- test/cuda/cudnn.jl | 2 +- test/data.jl | 2 +- test/layers/stateless.jl | 2 +- test/tracker.jl | 9 ++++++--- test/utils.jl | 1 + 10 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/data/cmudict.jl b/src/data/cmudict.jl index 502a9670..3ac0634d 100644 --- a/src/data/cmudict.jl +++ b/src/data/cmudict.jl @@ -25,13 +25,13 @@ end function phones() load() Symbol.(first.(split.(split(read(deps("cmudict", "cmudict.phones"),String), - "\n", keep = false), "\t"))) + "\n", keepempty = false), "\t"))) end function symbols() load() Symbol.(split(read(deps("cmudict", "cmudict.symbols"),String), - "\n", keep = false)) + "\n", keepempty = false)) end function rawdict() @@ -42,7 +42,7 @@ end validword(s) = isascii(s) && occursin(r"^[\w\-\.]+$", s) -cmudict() = filter((s, ps) -> validword(s), rawdict()) +cmudict() = filter(p -> validword(p.first), rawdict()) alphabet() = ['A':'Z'..., '0':'9'..., '_', '-', '.'] diff --git a/src/onehot.jl b/src/onehot.jl index f19fa6f8..dfc632a1 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -39,13 +39,13 @@ adapt(T, xs::OneHotMatrix) = OneHotMatrix(xs.height, adapt(T, xs.data)) end function onehot(l, labels) - i = findfirst(labels, l) + i = something(findfirst(isequal(l), labels), 0) i > 0 || error("Value $l is not in labels") OneHotVector(i, length(labels)) end function onehot(l, labels, unk) - i = findfirst(labels, l) + i = something(findfirst(isequal(l), labels), 0) i > 0 || return onehot(unk, labels) OneHotVector(i, length(labels)) end diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 4546cdc3..e99b39a7 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -2,7 +2,7 @@ import Base: *, == import LinearAlgebra using Statistics -using LinearAlgebra: Transpose, Adjoint, diagm +using LinearAlgebra: Transpose, Adjoint, diagm, diag struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} tracker::Tracked{A} @@ -94,7 +94,7 @@ Base.repeat(A::TrackedArray; kw...) = track(repeat, A; kw...) S = size(xs) # Loop through each element of Δ, calculate source dimensions, accumulate into Δ′ - for (dest_idx, val) in enumerate(IndexCartesian(), data(Δ)) + for (dest_idx, val) in pairs(IndexCartesian(), data(Δ)) # First, round dest_idx[dim] to nearest gridpoint defined by inner[dim], then # wrap around based on original size S. src_idx = [mod1(div(dest_idx[dim] - 1, inner[dim]) + 1, S[dim]) for dim in 1:length(S)] @@ -256,7 +256,7 @@ LinearAlgebra.vecnorm(x::TrackedArray, p::Real = 2) = sum(abs.(x).^p .+ eps(0f0))^(1/p) # avoid d(sqrt(x))/dx == Inf at 0 @grad mean(xs) = mean(data(xs)), Δ -> (Δ / length(xs),) -@grad mean(xs, region) = mean(data(xs), region), Δ -> (zero(xs) .+ Δ ./ prod(size(xs, region...)),nothing) +@grad mean(xs, region) = mean(data(xs), dims = region), Δ -> (zero(xs) .+ Δ ./ prod(size(xs, region...)),nothing) @grad function maximum(xs, r...) maximum(data(xs), r...), function (Δ) diff --git a/src/tracker/back.jl b/src/tracker/back.jl index 9ca0377c..04f5c231 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -96,7 +96,7 @@ end @forward Grads.grads Base.setindex!, Base.haskey -accum!(g::Grads, x, Δ) = g[x] = haskey(g, x) ? g[x] + Δ : Δ +accum!(g::Grads, x, Δ) = g[x] = haskey(g, x) ? g[x] .+ Δ : Δ function back_(g::Grads, c::Call, Δ) Δs = c.func(Δ) diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index d16ce8f2..f515a2bc 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -1,4 +1,4 @@ -using Flux, Flux.Tracker, CuArrays, Base.Test +using Flux, Flux.Tracker, CuArrays, Test using Flux: gpu info("Testing Flux/GPU") diff --git a/test/cuda/cudnn.jl b/test/cuda/cudnn.jl index 91b5b972..c67fc060 100644 --- a/test/cuda/cudnn.jl +++ b/test/cuda/cudnn.jl @@ -1,4 +1,4 @@ -using Flux, CuArrays, Base.Test +using Flux, CuArrays, Test info("Testing Flux/CUDNN") diff --git a/test/data.jl b/test/data.jl index 5a4c9ce6..e3c3de7a 100644 --- a/test/data.jl +++ b/test/data.jl @@ -1,5 +1,5 @@ using Flux.Data -using Base.Test +using Test @test cmudict()["CATASTROPHE"] == :[K,AH0,T,AE1,S,T,R,AH0,F,IY0].args diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index 31a67aa7..7c1d3efa 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -1,4 +1,4 @@ -using Base.Test +using Test using Flux: onehotbatch, mse, crossentropy, logitcrossentropy, σ, binarycrossentropy, logitbinarycrossentropy diff --git a/test/tracker.jl b/test/tracker.jl index 812bd5aa..3cec52f1 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -1,7 +1,10 @@ using Flux -using Flux.Tracker, Base.Test, NNlib +using Flux.Tracker, Test, NNlib using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint using NNlib: conv +using Printf: @sprintf +using LinearAlgebra: diagm, dot, LowerTriangular, norm +using Statistics: mean # using StatsBase gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...) @@ -110,7 +113,7 @@ end promotiontest(hcat, rand(2,1), rand(2), rand(2,2)) promotiontest(vcat, rand(3,4,5), rand(1,4,5), rand(2,4,5)) promotiontest(hcat, rand(4,3,5), rand(4,1,5), rand(4,2,5)) - promotiontest((x...) -> cat(3, x...), rand(4,5,3), rand(4,5,1), rand(4,5,2)) + promotiontest((x...) -> cat(x..., dims = 3), rand(4,5,3), rand(4,5,1), rand(4,5,2)) end end @@ -163,7 +166,7 @@ end @test gradtest((x, y) -> x .* y, rand(5), rand(5)) @test gradtest(dot, rand(5), rand(5)) -@test gradtest(vecnorm, rand(5)) +@test gradtest(norm, rand(5)) @test gradtest(rand(5)) do x y = x.^2 diff --git a/test/utils.jl b/test/utils.jl index bf3dd30a..6fb28e31 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,5 +1,6 @@ using Flux: throttle, initn, glorot_uniform, glorot_normal, jacobian using StatsBase: std +using Dates @testset "Throttle" begin @testset "default behaviour" begin From 02f343d44da570a814ea854778d9c9d57e4319d7 Mon Sep 17 00:00:00 2001 From: Simon Mandlik Date: Wed, 18 Jul 2018 20:20:00 +0200 Subject: [PATCH 030/121] fixed more dep warns, also in tests, but maximum, minimum and size in array.jl still need to be updated. As a result, some more tests may not pass for the time being --- src/tracker/array.jl | 5 +++-- test/tracker.jl | 26 +++++++++++++------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index e99b39a7..7c23288f 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -252,11 +252,11 @@ StatsBase.std(x::TrackedArray; mean = Statistics.mean(x)) = StatsBase.std(x::TrackedArray, dim; mean = Statistics.mean(x, dim)) = sqrt.(sum((x .- mean).^2, dim) ./ (size(x, dim)-1)) -LinearAlgebra.vecnorm(x::TrackedArray, p::Real = 2) = +LinearAlgebra.norm(x::TrackedArray, p::Real = 2) = sum(abs.(x).^p .+ eps(0f0))^(1/p) # avoid d(sqrt(x))/dx == Inf at 0 @grad mean(xs) = mean(data(xs)), Δ -> (Δ / length(xs),) -@grad mean(xs, region) = mean(data(xs), dims = region), Δ -> (zero(xs) .+ Δ ./ prod(size(xs, region...)),nothing) +@grad mean(xs, region) = mean(data(xs), dims=region), Δ -> (zero(xs) .+ Δ ./ prod(size(xs, region...)),nothing) @grad function maximum(xs, r...) maximum(data(xs), r...), function (Δ) @@ -266,6 +266,7 @@ LinearAlgebra.vecnorm(x::TrackedArray, p::Real = 2) = return (nobacksies(:maximum, Δ′),map(_->nothing,r)...) end end + @grad function minimum(xs, r...) minimum(data(xs), r...), function (Δ) Δ′ = zero(xs) diff --git a/test/tracker.jl b/test/tracker.jl index 3cec52f1..44533b1a 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -24,7 +24,7 @@ gradtest(f, dims...) = gradtest(f, rand.(dims)...) @test gradtest(x -> sum(x, dims = 1), randn(Float64,2,3)) @test gradtest(x -> sum(x, dims = [1,2]), randn(Float64,2,3)) @test gradtest(x -> sum(x), randn(Float64,2,3)) -@test gradtest(x -> prod(x, (2, 3)), (3,4,5)) +@test gradtest(x -> prod(x, dims=(2, 3)), (3,4,5)) @test gradtest(x -> prod(x), (3,4,5)) @test gradtest(x -> softmax(x).*(1:3), 3) @@ -133,31 +133,31 @@ end @testset "mean" begin @test gradtest(mean, rand(2, 3)) - @test gradtest(x -> mean(x, 1), rand(2, 3)) - @test gradtest(x -> mean(x, 2), rand(2, 3)) - @test gradtest(x -> mean(x, 3), rand(2, 3, 4)) + @test gradtest(x -> mean(x, dims=1), rand(2, 3)) + @test gradtest(x -> mean(x, dims=2), rand(2, 3)) + @test gradtest(x -> mean(x, dims=3), rand(2, 3, 4)) - @test gradtest(x -> mean(x, [1, 2]), rand(2, 3, 4)) + @test gradtest(x -> mean(x, dims=[1, 2]), rand(2, 3, 4)) end @testset "maximum" begin @test gradtest(maximum, rand(2, 3)) - @test gradtest(x -> maximum(x, 1), rand(2, 3)) - @test gradtest(x -> maximum(x, 2), rand(2, 3)) - @test gradtest(x -> maximum(x, 3), rand(2, 3, 4)) + @test gradtest(x -> maximum(x, dims=1), rand(2, 3)) + @test gradtest(x -> maximum(x, dims=2), rand(2, 3)) + @test gradtest(x -> maximum(x, dims=3), rand(2, 3, 4)) - @test gradtest(x -> maximum(x, [1, 2]), rand(2, 3, 4)) + @test gradtest(x -> maximum(x, dims=[1, 2]), rand(2, 3, 4)) end @testset "minimum" begin @test gradtest(minimum, rand(2, 3)) - @test gradtest(x -> minimum(x, 1), rand(2, 3)) - @test gradtest(x -> minimum(x, 2), rand(2, 3)) - @test gradtest(x -> minimum(x, 3), rand(2, 3, 4)) + @test gradtest(x -> minimum(x, dims=1), rand(2, 3)) + @test gradtest(x -> minimum(x, dims=2), rand(2, 3)) + @test gradtest(x -> minimum(x, dims=3), rand(2, 3, 4)) - @test gradtest(x -> minimum(x, [1, 2]), rand(2, 3, 4)) + @test gradtest(x -> minimum(x, dims=[1, 2]), rand(2, 3, 4)) end @test gradtest(x -> std(x), rand(5,5)) From c657d4e47f5e5eef8adcf4d2e4ea9bbded744e9b Mon Sep 17 00:00:00 2001 From: pevnak Date: Thu, 19 Jul 2018 09:44:15 +0200 Subject: [PATCH 031/121] fixed the sum as suggested by mike --- src/layers/recurrent.jl | 2 +- src/tracker/array.jl | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 969a777e..15a590b7 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -84,7 +84,7 @@ end RNNCell(in::Integer, out::Integer, σ = tanh; init = glorot_uniform) = RNNCell(σ, param(init(out, in)), param(init(out, out)), - param(zero(out)), param(initn(out))) + param(fill(0.0,out)), param(initn(out))) function (m::RNNCell)(h, x) σ, Wi, Wh, b = m.σ, m.Wi, m.Wh, m.b diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 7c23288f..818e5e73 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -205,16 +205,16 @@ Base.kron(a::AbstractMatrix, b::TrackedMatrix) = _kron(a, b) # Reductions -Base.sum(xs::TrackedArray; dims) = track(sum, xs, dims) -Base.sum(xs::TrackedArray) = track(sum, xs) +Base.sum(xs::TrackedArray; dims = :) = track(sum, xs, dims = dims) +# Base.sum(xs::TrackedArray) = track(sum, xs) Base.sum(f::Union{Function,Type},xs::TrackedArray) = sum(f.(xs)) -@grad sum(xs, dims::Int) = sum(data(xs), dims = dims), - Δ -> (zero(xs) .+ Δ, nothing) -@grad sum(xs, dims) = sum(data(xs), dims = dims), - Δ -> (zero(xs) .+ Δ, map(_->nothing,dims)...) -@grad sum(xs) = sum(data(xs)), - Δ -> (zero(xs) .+ Δ,) +# @grad sum(xs, dims::Int) = sum(data(xs), dims = dims), + # Δ -> (zero(xs) .+ Δ, nothing) +@grad sum(xs; dims = :) = sum(data(xs), dims = dims), + Δ -> (zero(xs) .+ Δ, ) +# @grad sum(xs) = sum(data(xs)), + # Δ -> (zero(xs) .+ Δ,) Base.prod(xs::TrackedArray, dim) = track(prod, xs, dim) Base.prod(xs::TrackedArray) = track(prod, xs) From 926411a449fc0115b79c4e89869cd515d7e06a0b Mon Sep 17 00:00:00 2001 From: pevnak Date: Thu, 19 Jul 2018 10:58:43 +0200 Subject: [PATCH 032/121] removed most error, the only one in Fallbacks test persits --- src/Flux.jl | 2 +- src/layers/normalise.jl | 4 ++-- src/tracker/array.jl | 44 ++++++++++++++++------------------------- test/tracker.jl | 4 ++-- 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index 239a561d..0989aa9c 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -4,7 +4,7 @@ module Flux # Zero Flux Given -using MacroTools, Juno, Requires, Reexport, StatsBase, Random +using MacroTools, Juno, Requires, Reexport, Statistics, Random using MacroTools: @forward export Chain, Dense, RNN, LSTM, GRU, Conv, diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 6e739a52..5e5b9b4a 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -108,9 +108,9 @@ mutable struct BatchNorm{F,V,W,N} end BatchNorm(chs::Integer, λ = identity; - initβ = zeros, initγ = ones, ϵ = 1e-8, momentum = .1) = + initβ = (i) -> fill(0.0,i), initγ = (i) -> fill(1.0,i), ϵ = 1e-8, momentum = .1) = BatchNorm(λ, param(initβ(chs)), param(initγ(chs)), - zero(chs), ones(chs), ϵ, momentum, true) + fill(0.0,chs), fill(1.0,chs), ϵ, momentum, true) function (BN::BatchNorm)(x) size(x, ndims(x)-1) == length(BN.β) || diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 818e5e73..20568086 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -206,15 +206,10 @@ Base.kron(a::AbstractMatrix, b::TrackedMatrix) = _kron(a, b) # Reductions Base.sum(xs::TrackedArray; dims = :) = track(sum, xs, dims = dims) -# Base.sum(xs::TrackedArray) = track(sum, xs) Base.sum(f::Union{Function,Type},xs::TrackedArray) = sum(f.(xs)) -# @grad sum(xs, dims::Int) = sum(data(xs), dims = dims), - # Δ -> (zero(xs) .+ Δ, nothing) @grad sum(xs; dims = :) = sum(data(xs), dims = dims), Δ -> (zero(xs) .+ Δ, ) -# @grad sum(xs) = sum(data(xs)), - # Δ -> (zero(xs) .+ Δ,) Base.prod(xs::TrackedArray, dim) = track(prod, xs, dim) Base.prod(xs::TrackedArray) = track(prod, xs) @@ -228,13 +223,10 @@ Base.prod(f::Union{Function, Type}, xs::TrackedArray) = prod(f.(xs)) Base.findfirst(xs::TrackedArray, args...) = findfirst(xs.data, args...) -Statistics.mean(xs::TrackedArray) = track(mean, xs) -Statistics.mean(xs::TrackedArray, region) = track(mean, xs, region) +Statistics.mean(xs::TrackedArray; dims = :) = track(mean, xs, dims = dims) -Base.maximum(xs::TrackedArray) = track(maximum, xs) -Base.maximum(xs::TrackedArray, region) = track(maximum, xs, region) -Base.minimum(xs::TrackedArray) = track(minimum, xs) -Base.minimum(xs::TrackedArray, region) = track(minimum, xs, region) +Base.maximum(xs::TrackedArray; dims = :) = track(maximum, xs, dims = dims) +Base.minimum(xs::TrackedArray; dims = :) = track(minimum, xs, dims = dims) import LinearAlgebra: dot @@ -244,35 +236,33 @@ dot(xs::TrackedVector, ys::AbstractVector) = track(dot, xs, ys) @grad dot(xs, ys) = dot(data(xs), data(ys)), Δ -> (Δ .* ys, Δ .* xs) -using StatsBase - # Hacks to get std working -StatsBase.std(x::TrackedArray; mean = Statistics.mean(x)) = - sqrt.(sum((x .- mean).^2) ./ (length(x)-1)) -StatsBase.std(x::TrackedArray, dim; mean = Statistics.mean(x, dim)) = - sqrt.(sum((x .- mean).^2, dim) ./ (size(x, dim)-1)) +Statistics.std(x::TrackedArray; dims = :, mean = Statistics.mean(x, dims = dims)) = _std(x,mean,dims) +_std(x::TrackedArray, mean, dims) = sqrt.(sum((x .- mean).^2, dims = dims) ./ (mapreduce(i -> size(x,i),*, dims) - 1)) +_std(x::TrackedArray, mean, ::Colon) = sqrt.(sum((x .- mean).^2) ./ (length(x) - 1)) LinearAlgebra.norm(x::TrackedArray, p::Real = 2) = sum(abs.(x).^p .+ eps(0f0))^(1/p) # avoid d(sqrt(x))/dx == Inf at 0 -@grad mean(xs) = mean(data(xs)), Δ -> (Δ / length(xs),) -@grad mean(xs, region) = mean(data(xs), dims=region), Δ -> (zero(xs) .+ Δ ./ prod(size(xs, region...)),nothing) +@grad mean(xs; dims = :) = mean(data(xs), dims=dims), Δ -> (_backmean(xs,Δ,dims),) +_backmean(xs, Δ, ::Colon) = zero(xs) .+ Δ ./ length(xs) +_backmean(xs, Δ, dims) = zero(xs) .+ Δ ./ mapreduce(i -> size(data(xs),i),*,dims) -@grad function maximum(xs, r...) - maximum(data(xs), r...), function (Δ) +@grad function maximum(xs; dims = dims) + maximum(data(xs), dims = dims), function (Δ) Δ′ = zero(xs) - _, i = findmax(data(xs), r...) + _, i = findmax(data(xs), dims = dims) Δ′[i] = data(Δ) - return (nobacksies(:maximum, Δ′),map(_->nothing,r)...) + return (nobacksies(:maximum, Δ′),) end end -@grad function minimum(xs, r...) - minimum(data(xs), r...), function (Δ) +@grad function minimum(xs; dims = dims) + minimum(data(xs), dims = dims), function (Δ) Δ′ = zero(xs) - _, i = findmin(data(xs), r...) + _, i = findmin(data(xs), dims = dims) Δ′[i] = data(Δ) - return (nobacksies(:minimum, Δ′),map(_->nothing,r)...) + return (nobacksies(:minimum, Δ′),) end end diff --git a/test/tracker.jl b/test/tracker.jl index 44533b1a..900bcd1c 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -4,7 +4,7 @@ using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint using NNlib: conv using Printf: @sprintf using LinearAlgebra: diagm, dot, LowerTriangular, norm -using Statistics: mean +using Statistics: mean, std # using StatsBase gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...) @@ -161,7 +161,7 @@ end end @test gradtest(x -> std(x), rand(5,5)) -@test gradtest(x -> std(x, 1), rand(5,5)) +@test gradtest(x -> std(x, dims = 1), rand(5,5)) @test gradtest((x, y) -> x .* y, rand(5), rand(5)) @test gradtest(dot, rand(5), rand(5)) From 7103a0ed7d600d48f9205a396baa89795815e5bc Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 3 Aug 2018 15:19:10 +0100 Subject: [PATCH 033/121] tweaks --- src/cuda/cudnn.jl | 2 +- src/layers/normalise.jl | 4 ++-- src/layers/recurrent.jl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cuda/cudnn.jl b/src/cuda/cudnn.jl index ba6c8fd7..585b948d 100644 --- a/src/cuda/cudnn.jl +++ b/src/cuda/cudnn.jl @@ -83,7 +83,7 @@ function RNNDesc{T}(mode::Int, input::Int, hidden::Int; layers = 1) where T @check ccall((:cudnnSetRNNDescriptor_v6,libcudnn), cudnnStatus_t, (Ptr{Nothing},Ptr{Nothing},Cint,Cint,Ptr{Nothing},Cint,Cint,Cint,Cint,Cint), libcudnn_handle[],d[],hidden,layers,dropoutDesc,inputMode,direction,mode,algo,cudnnDataType(T)) - w = cuzero(T, rnnParamSize(T, d[], input)) + w = cuzeros(T, rnnParamSize(T, d[], input)) # TODO: avoid reserve allocation here rd = RNNDesc{T}(mode, input, hidden, w, params(w, input, hidden, ngates(mode))..., d[]) finalizer(rd, x -> diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 5e5b9b4a..f6297034 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -108,9 +108,9 @@ mutable struct BatchNorm{F,V,W,N} end BatchNorm(chs::Integer, λ = identity; - initβ = (i) -> fill(0.0,i), initγ = (i) -> fill(1.0,i), ϵ = 1e-8, momentum = .1) = + initβ = (i) -> zeros(i), initγ = (i) -> ones(i), ϵ = 1e-8, momentum = .1) = BatchNorm(λ, param(initβ(chs)), param(initγ(chs)), - fill(0.0,chs), fill(1.0,chs), ϵ, momentum, true) + zeros(chs), ones(chs), ϵ, momentum, true) function (BN::BatchNorm)(x) size(x, ndims(x)-1) == length(BN.β) || diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 15a590b7..d97c7fd7 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -84,7 +84,7 @@ end RNNCell(in::Integer, out::Integer, σ = tanh; init = glorot_uniform) = RNNCell(σ, param(init(out, in)), param(init(out, out)), - param(fill(0.0,out)), param(initn(out))) + param(zeros(out)), param(initn(out))) function (m::RNNCell)(h, x) σ, Wi, Wh, b = m.σ, m.Wi, m.Wh, m.b From 6cdf4ff56adfe01277bd2340161f6d6e55419f6e Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 3 Aug 2018 16:22:54 +0100 Subject: [PATCH 034/121] add statsbase to require --- REQUIRE | 1 + 1 file changed, 1 insertion(+) diff --git a/REQUIRE b/REQUIRE index 8b235c42..df9c6322 100644 --- a/REQUIRE +++ b/REQUIRE @@ -9,6 +9,7 @@ Colors ZipFile AbstractTrees Reexport +StatsBase # AD ForwardDiff 0.5.0 From 62d594af435d6daa54e3475cf9679f85f7af9205 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Tue, 7 Aug 2018 22:09:20 +0100 Subject: [PATCH 035/121] out of place gradients for collect --- src/tracker/scalar.jl | 4 ++++ test/tracker.jl | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/src/tracker/scalar.jl b/src/tracker/scalar.jl index 50b9c7af..9ff1895a 100644 --- a/src/tracker/scalar.jl +++ b/src/tracker/scalar.jl @@ -115,3 +115,7 @@ end function back_(c::Call{typeof(collect)}, Δ) foreach(back, c.args[1], data(Δ)) end + +function back_(g::Grads, c::Call{typeof(collect)}, Δ) + foreach((x, Δ) -> back(g, x, Δ), c.args[1], Δ) +end diff --git a/test/tracker.jl b/test/tracker.jl index 900bcd1c..d504f0a4 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -232,6 +232,11 @@ Tracker.back!(b) z = xy[1]*xy[2] back!(z) @test grad.((x,y)) == (3, 2) + + @test Tracker.gradient(2, 3) do x, y + xy = Tracker.collect([x, y]) + xy[1]*xy[2] + end == (3, 2) end # Gradient Hooks From 5186e3ba186f9df57e34aa16ab2f6acf5d6e44d6 Mon Sep 17 00:00:00 2001 From: Josh Christie Date: Sat, 11 Aug 2018 10:51:07 +0100 Subject: [PATCH 036/121] Updates for julia 1.0 --- src/layers/basic.jl | 4 ++-- src/tracker/back.jl | 5 +++-- src/tracker/idset.jl | 8 +++++--- test/runtests.jl | 3 ++- test/tracker.jl | 4 ++-- test/utils.jl | 3 ++- 6 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index d461c95c..f7344484 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -21,8 +21,8 @@ struct Chain Chain(xs...) = new([xs...]) end -@forward Chain.layers Base.getindex, Base.first, Base.last, Base.endof, Base.push! -@forward Chain.layers Base.start, Base.next, Base.done +@forward Chain.layers Base.getindex, Base.first, Base.last, Base.lastindex, Base.push! +@forward Chain.layers Base.iterate children(c::Chain) = c.layers mapchildren(f, c::Chain) = Chain(f.(c.layers)...) diff --git a/src/tracker/back.jl b/src/tracker/back.jl index 04f5c231..5c44a15a 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -70,7 +70,7 @@ struct Params Params(xs) = new(IdSet(xs)) end -@forward Params.params Base.start, Base.next, Base.done +@forward Params.params Base.iterate function Base.show(io::IO, ps::Params) print(io, "Params([") @@ -86,6 +86,8 @@ Base.show(io::IO, ps::Grads) = println(io, "Grads(...)") Grads() = Grads(IdDict()) +@forward Grads.grads Base.setindex!, Base.haskey, Base.length, Base.iterate + Grads(ps::Params) = Grads(IdDict(tracker(p) => init_grad(data(p)) for p in ps)) Base.getindex(g::Grads, x::Tracked) = g.grads[x] @@ -94,7 +96,6 @@ function Base.getindex(g::Grads, x) g[tracker(x)] end -@forward Grads.grads Base.setindex!, Base.haskey accum!(g::Grads, x, Δ) = g[x] = haskey(g, x) ? g[x] .+ Δ : Δ diff --git a/src/tracker/idset.jl b/src/tracker/idset.jl index 1bbfec09..442d5fa2 100644 --- a/src/tracker/idset.jl +++ b/src/tracker/idset.jl @@ -20,6 +20,8 @@ Base.similar(s::IdSet, T::Type) = IdSet{T}() @forward IdSet.dict Base.length -Base.start(s::IdSet) = start(keys(s.dict)) -Base.next(s::IdSet, st) = next(keys(s.dict), st) -Base.done(s::IdSet, st) = done(keys(s.dict), st) +function iterate(v::IdSet, state...) + y = iterate(keys(v.dict), state...) + y === nothing && return nothing + return (y[1], y[2]) +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 6d698784..fcda4e82 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using Flux, Test, Random +using Random -srand(0) +Random.seed!(0) @testset "Flux" begin diff --git a/test/tracker.jl b/test/tracker.jl index d504f0a4..768cc4f7 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -5,13 +5,13 @@ using NNlib: conv using Printf: @sprintf using LinearAlgebra: diagm, dot, LowerTriangular, norm using Statistics: mean, std +using Random # using StatsBase gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...) -gradtest(f, dims...) = gradtest(f, rand.(dims)...) +gradtest(f, dims...) = gradtest(f, rand.(Float64, dims)...) @testset "Tracker" begin - @test gradtest((x, W, b) -> σ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> σ.(W*x .+ b), (5,3), (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2) diff --git a/test/utils.jl b/test/utils.jl index 6fb28e31..5e1b0ef0 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,6 +1,7 @@ using Flux: throttle, initn, glorot_uniform, glorot_normal, jacobian using StatsBase: std using Dates +using Random @testset "Throttle" begin @testset "default behaviour" begin @@ -61,7 +62,7 @@ end @testset "Initialization" begin # Set random seed so that these tests don't fail randomly - srand(0) + Random.seed!(0) # initn() should yield a kernel with stddev ~= 1e-2 v = initn(10, 10) @test std(v) > 0.9*1e-2 From d3c78a80be7d4b3dfb7eeb1f1f42e04f10e2ee8c Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sat, 11 Aug 2018 17:20:27 +0530 Subject: [PATCH 037/121] Fix layers errors --- src/layers/normalise.jl | 8 ++++---- test/layers/normalisation.jl | 6 +++--- test/layers/stateless.jl | 4 ++-- test/runtests.jl | 8 +++++++- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index f6297034..164f6fa7 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -130,13 +130,13 @@ function (BN::BatchNorm)(x) ϵ = data(convert(T, BN.ϵ)) axes = [1:dims-2; dims] # axes to reduce along (all but channels axis) - μ = mean(x, axes) - σ = sqrt.(mean((x .- μ).^2, axes) .+ ϵ) + μ = mean(x, dims = axes) + σ = sqrt.(mean((x .- μ).^2, dims = axes) .+ ϵ) # update moving mean/std mtm = data(convert(T, BN.momentum)) - BN.μ = (1 - mtm) .* BN.μ .+ mtm .* squeeze(data(μ), (axes...,)) - BN.σ = (1 - mtm) .* BN.σ .+ mtm .* squeeze(data(σ), (axes...,)) .* m ./ (m - 1) + BN.μ = (1 - mtm) .* BN.μ .+ mtm .* dropdims(data(μ), dims = (axes...,)) + BN.σ = (1 - mtm) .* BN.σ .+ mtm .* dropdims(data(σ), dims = (axes...,)) .* m ./ (m - 1) end let λ = BN.λ diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl index a7a7ada2..b17120b0 100644 --- a/test/layers/normalisation.jl +++ b/test/layers/normalisation.jl @@ -53,17 +53,17 @@ end # .1 * 4 + 0 = .4 @test m.μ ≈ reshape([0.3, 0.4], 2, 1) - # julia> .1 .* std(x, 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.] + # julia> .1 .* std(x, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.] # 2×1 Array{Float64,2}: # 1.14495 # 1.14495 - @test m.σ ≈ .1 .* std(x.data, 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.] + @test m.σ ≈ .1 .* std(x.data, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.] testmode!(m) @test !m.active x′ = m(x).data - @test x′[1] ≈ (1 - 0.3) / 1.1449489742783179 + @test x′[1] ≈ (1 .- 0.3) / 1.1449489742783179 end # with activation function diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index 7c1d3efa..d4599908 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -42,8 +42,8 @@ const ϵ = 1e-7 logŷ, y = randn(3), rand(3) @testset "binarycrossentropy" begin - @test binarycrossentropy.(σ.(logŷ), y; ϵ=0) ≈ -y.*log.(σ.(logŷ)) - (1 - y).*log.(1 - σ.(logŷ)) - @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 - y).*log.(1 - σ.(logŷ) .+ eps.(σ.(logŷ))) + @test binarycrossentropy.(σ.(logŷ), y; ϵ=0) ≈ -y.*log.(σ.(logŷ)) - (1 .- y).*log.(1 .- σ.(logŷ)) + @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 .- y).*log.(1 .- σ.(logŷ) .+ eps.(σ.(logŷ))) end @testset "logitbinarycrossentropy" begin diff --git a/test/runtests.jl b/test/runtests.jl index fcda4e82..2ce0e63b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,11 +5,17 @@ Random.seed!(0) @testset "Flux" begin +println("Testing") include("utils.jl") -include("tracker.jl") +# println("Testing") +# include("tracker.jl") +println("Testing") include("layers/normalisation.jl") +println("Testing") include("layers/stateless.jl") +println("Testing") include("optimise.jl") +println("Testing") include("data.jl") # if Base.find_in_path("CuArrays") ≠ nothing From 837e03613f98ff9b949815018cba02a3682dab3c Mon Sep 17 00:00:00 2001 From: Josh Christie Date: Sat, 11 Aug 2018 10:51:07 +0100 Subject: [PATCH 038/121] Updates for julia 1.0 --- REQUIRE | 2 +- src/layers/basic.jl | 4 ++-- src/tracker/back.jl | 5 +++-- src/tracker/idset.jl | 8 +++++--- test/runtests.jl | 3 ++- test/tracker.jl | 4 ++-- test/utils.jl | 3 ++- 7 files changed, 17 insertions(+), 12 deletions(-) diff --git a/REQUIRE b/REQUIRE index df9c6322..7164de5a 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,4 @@ -julia 0.7- +julia 0.7 Juno MacroTools 0.3.3 NNlib diff --git a/src/layers/basic.jl b/src/layers/basic.jl index d461c95c..f7344484 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -21,8 +21,8 @@ struct Chain Chain(xs...) = new([xs...]) end -@forward Chain.layers Base.getindex, Base.first, Base.last, Base.endof, Base.push! -@forward Chain.layers Base.start, Base.next, Base.done +@forward Chain.layers Base.getindex, Base.first, Base.last, Base.lastindex, Base.push! +@forward Chain.layers Base.iterate children(c::Chain) = c.layers mapchildren(f, c::Chain) = Chain(f.(c.layers)...) diff --git a/src/tracker/back.jl b/src/tracker/back.jl index 04f5c231..774123b4 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -70,7 +70,7 @@ struct Params Params(xs) = new(IdSet(xs)) end -@forward Params.params Base.start, Base.next, Base.done +@forward Params.params Base.iterate, Base.length function Base.show(io::IO, ps::Params) print(io, "Params([") @@ -86,6 +86,8 @@ Base.show(io::IO, ps::Grads) = println(io, "Grads(...)") Grads() = Grads(IdDict()) +@forward Grads.grads Base.setindex!, Base.haskey, Base.length, Base.iterate + Grads(ps::Params) = Grads(IdDict(tracker(p) => init_grad(data(p)) for p in ps)) Base.getindex(g::Grads, x::Tracked) = g.grads[x] @@ -94,7 +96,6 @@ function Base.getindex(g::Grads, x) g[tracker(x)] end -@forward Grads.grads Base.setindex!, Base.haskey accum!(g::Grads, x, Δ) = g[x] = haskey(g, x) ? g[x] .+ Δ : Δ diff --git a/src/tracker/idset.jl b/src/tracker/idset.jl index 1bbfec09..62d5190e 100644 --- a/src/tracker/idset.jl +++ b/src/tracker/idset.jl @@ -20,6 +20,8 @@ Base.similar(s::IdSet, T::Type) = IdSet{T}() @forward IdSet.dict Base.length -Base.start(s::IdSet) = start(keys(s.dict)) -Base.next(s::IdSet, st) = next(keys(s.dict), st) -Base.done(s::IdSet, st) = done(keys(s.dict), st) +function Base.iterate(v::IdSet, state...) + y = Base.iterate(keys(v.dict), state...) + y === nothing && return nothing + return (y[1], y[2]) +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 6d698784..fcda4e82 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using Flux, Test, Random +using Random -srand(0) +Random.seed!(0) @testset "Flux" begin diff --git a/test/tracker.jl b/test/tracker.jl index d504f0a4..768cc4f7 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -5,13 +5,13 @@ using NNlib: conv using Printf: @sprintf using LinearAlgebra: diagm, dot, LowerTriangular, norm using Statistics: mean, std +using Random # using StatsBase gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...) -gradtest(f, dims...) = gradtest(f, rand.(dims)...) +gradtest(f, dims...) = gradtest(f, rand.(Float64, dims)...) @testset "Tracker" begin - @test gradtest((x, W, b) -> σ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> σ.(W*x .+ b), (5,3), (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2) diff --git a/test/utils.jl b/test/utils.jl index 6fb28e31..5e1b0ef0 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,6 +1,7 @@ using Flux: throttle, initn, glorot_uniform, glorot_normal, jacobian using StatsBase: std using Dates +using Random @testset "Throttle" begin @testset "default behaviour" begin @@ -61,7 +62,7 @@ end @testset "Initialization" begin # Set random seed so that these tests don't fail randomly - srand(0) + Random.seed!(0) # initn() should yield a kernel with stddev ~= 1e-2 v = initn(10, 10) @test std(v) > 0.9*1e-2 From 5db7a3a3ad8f805cd1f7c84369404b43dda678f7 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Sat, 11 Aug 2018 18:23:47 +0530 Subject: [PATCH 039/121] Fix Optimizers --- src/optimise/Optimise.jl | 5 +++-- src/tracker/idset.jl | 7 +------ test/runtests.jl | 14 ++++---------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index 6765a778..ee7723bc 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -9,7 +9,7 @@ struct Param{T} Δ::T end -Base.convert(::Type{Param}, x::AbstractArray) = Param(x, zero(x)) +Param(x::AbstractArray) = Param(x, zero(x)) include("optimisers.jl") include("interface.jl") @@ -17,6 +17,7 @@ include("train.jl") using Flux.Tracker: TrackedArray -Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad) +Param(x::TrackedArray) = Param(x.data, x.grad) +# Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad) end diff --git a/src/tracker/idset.jl b/src/tracker/idset.jl index 940db15d..d1c507b4 100644 --- a/src/tracker/idset.jl +++ b/src/tracker/idset.jl @@ -20,13 +20,8 @@ Base.similar(s::IdSet, T::Type) = IdSet{T}() @forward IdSet.dict Base.length -<<<<<<< HEAD -function iterate(v::IdSet, state...) - y = iterate(keys(v.dict), state...) -======= function Base.iterate(v::IdSet, state...) y = Base.iterate(keys(v.dict), state...) ->>>>>>> 837e03613f98ff9b949815018cba02a3682dab3c y === nothing && return nothing return (y[1], y[2]) -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index 2ce0e63b..15f59459 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,21 +5,15 @@ Random.seed!(0) @testset "Flux" begin -println("Testing") include("utils.jl") -# println("Testing") -# include("tracker.jl") -println("Testing") +include("tracker.jl") include("layers/normalisation.jl") -println("Testing") include("layers/stateless.jl") -println("Testing") include("optimise.jl") -println("Testing") include("data.jl") -# if Base.find_in_path("CuArrays") ≠ nothing -# include("cuda/cuda.jl") -# end +if Base.find_in_path("CuArrays") ≠ nothing + include("cuda/cuda.jl") +end end From 89881a9b21b4ab05ec903229bfd9fb6c67698885 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Sat, 11 Aug 2018 18:24:59 +0530 Subject: [PATCH 040/121] utils errors fixed --- test/optimise.jl | 2 +- test/utils.jl | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/optimise.jl b/test/optimise.jl index c896bb39..7e99c294 100644 --- a/test/optimise.jl +++ b/test/optimise.jl @@ -1,6 +1,6 @@ using Flux.Optimise using Flux.Tracker - +using Test @testset "Optimise" begin w = randn(10, 10) @testset for Opt in [SGD, Nesterov, Momentum, ADAM, AdaMax, RMSProp, ps -> ADAGrad(ps, 0.1), ADADelta, AMSGrad, NADAM] diff --git a/test/utils.jl b/test/utils.jl index 5e1b0ef0..119baaff 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,7 +1,10 @@ -using Flux: throttle, initn, glorot_uniform, glorot_normal, jacobian +using Flux +using Flux: throttle, jacobian, initn, glorot_uniform, glorot_normal using StatsBase: std using Dates using Random +using Test +using Dates: now @testset "Throttle" begin @testset "default behaviour" begin From 710a65fe72f22225b4c1ddfbf83647ea38e7135a Mon Sep 17 00:00:00 2001 From: Josh Christie Date: Sat, 11 Aug 2018 14:27:56 +0100 Subject: [PATCH 041/121] Fix back scalar with a Ref and fix diagonal test --- src/tracker/back.jl | 2 +- test/tracker.jl | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/tracker/back.jl b/src/tracker/back.jl index 774123b4..e5a84a71 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -137,7 +137,7 @@ end function forward(f, args...) args = param.(args) y, back = forward(() -> f(args...), Params(args)) - y, Δ -> getindex.(back(Δ), args) + y, Δ -> getindex.(Ref(back(Δ)), args) end function losscheck(x) diff --git a/test/tracker.jl b/test/tracker.jl index 768cc4f7..6cf4aba8 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -3,23 +3,20 @@ using Flux.Tracker, Test, NNlib using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint using NNlib: conv using Printf: @sprintf -using LinearAlgebra: diagm, dot, LowerTriangular, norm +using LinearAlgebra: Diagonal, dot, LowerTriangular, norm using Statistics: mean, std using Random # using StatsBase gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...) gradtest(f, dims...) = gradtest(f, rand.(Float64, dims)...) - @testset "Tracker" begin @test gradtest((x, W, b) -> σ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> σ.(W*x .+ b), (5,3), (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2) - @test gradtest((w, x) -> w'*x, randn(Float64,10, 2), randn(Float64,10)) @test gradtest((w, x) -> w*x', randn(Float64,5,5), randn(Float64,5,5)) - @test gradtest(x -> sum(x, dims = (2, 3)), (3,4,5)) @test gradtest(x -> sum(x, dims = 1), randn(Float64,2,3)) @test gradtest(x -> sum(x, dims = [1,2]), randn(Float64,2,3)) @@ -36,7 +33,6 @@ gradtest(f, dims...) = gradtest(f, rand.(Float64, dims)...) @test gradtest(Flux.crossentropy, rand(5,5), rand(5, 5)) @test gradtest(x -> x', rand(5)) - function promotiontest(f, A, B, C) r0 = f(A, B, C) r1 = f(param(A), B, C) @@ -69,6 +65,7 @@ end @test gradtest(vcatf, rand(5)', rand(2,5)) end + @testset for hcatf in [hcat, cat2] @test gradtest(hcatf, rand(5), rand(5)) @test gradtest(hcatf, rand(5)', rand(5)') @@ -97,7 +94,7 @@ end @test !isa(vcat(rand(2)), TrackedArray) @test !isa(hcat(rand(2)), TrackedArray) - @test !isa(cat(1,rand(2)), TrackedArray) + @test !isa(cat(rand(2), dims=1), TrackedArray) @test gradtest((a,b)->cat(a, b, dims = (2,3,5)), rand(2,3), rand(2,4,2,1)) @@ -115,6 +112,7 @@ end promotiontest(hcat, rand(4,3,5), rand(4,1,5), rand(4,2,5)) promotiontest((x...) -> cat(x..., dims = 3), rand(4,5,3), rand(4,5,1), rand(4,5,2)) end + end @test gradtest(x -> permutedims(x, [3,1,2]), rand(4,5,6)) @@ -128,7 +126,7 @@ end @test gradtest(kron, rand(5,1), rand(3,1), rand(8,1)) @test gradtest(kron, rand(5,2), rand(3,2), rand(8,2)) -@test gradtest(diagm, rand(3)) +@test gradtest(f-> Matrix(Diagonal(f)), rand(3)) @testset "mean" begin @test gradtest(mean, rand(2, 3)) From c8307a06272b66735f01aa4ee21aaadbf1df6389 Mon Sep 17 00:00:00 2001 From: Josh Christie Date: Sat, 11 Aug 2018 14:42:33 +0100 Subject: [PATCH 042/121] Use @info for logging --- src/data/mnist.jl | 2 +- src/data/sentiment.jl | 2 +- src/optimise/train.jl | 2 +- test/cuda/cuda.jl | 2 +- test/cuda/cudnn.jl | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/data/mnist.jl b/src/data/mnist.jl index 34bcd50c..3322df84 100644 --- a/src/data/mnist.jl +++ b/src/data/mnist.jl @@ -14,7 +14,7 @@ function load() "t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte"] isfile(file) && continue - info("Downloading MNIST dataset") + @info "Downloading MNIST dataset" download("https://cache.julialang.org/http://yann.lecun.com/exdb/mnist/$file.gz", "$file.gz") open(file, "w") do io write(io, GZip.open(read, "$file.gz")) diff --git a/src/data/sentiment.jl b/src/data/sentiment.jl index 570fcf5d..a269107f 100644 --- a/src/data/sentiment.jl +++ b/src/data/sentiment.jl @@ -5,7 +5,7 @@ using ..Data: deps function load() isfile(deps("sentiment.zip")) || return - info("Downloading sentiment treebank dataset") + @info "Downloading sentiment treebank dataset" download("https://cache.julialang.org/https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip", deps("sentiment.zip")) end diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 8ad8573e..95009444 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -59,7 +59,7 @@ hello """ macro epochs(n, ex) :(@progress for i = 1:$(esc(n)) - info("Epoch $i") + @info "Epoch $i" $(esc(ex)) end) end diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index f515a2bc..fd860189 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -1,7 +1,7 @@ using Flux, Flux.Tracker, CuArrays, Test using Flux: gpu -info("Testing Flux/GPU") +@info "Testing Flux/GPU" @testset "CuArrays" begin diff --git a/test/cuda/cudnn.jl b/test/cuda/cudnn.jl index c67fc060..d5cf442b 100644 --- a/test/cuda/cudnn.jl +++ b/test/cuda/cudnn.jl @@ -1,6 +1,6 @@ using Flux, CuArrays, Test -info("Testing Flux/CUDNN") +@info "Testing Flux/CUDNN" @testset "RNN" begin @testset for R in [RNN, GRU, LSTM] From 59bdff2cae30e1afc75623bf65394da7bea6bfaf Mon Sep 17 00:00:00 2001 From: Josh Christie Date: Sat, 11 Aug 2018 14:50:11 +0100 Subject: [PATCH 043/121] Test 0.7 and 1.0 --- .travis.yml | 2 ++ src/data/cmudict.jl | 2 +- test/runtests.jl | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index bef19a3e..dfdc0496 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,8 @@ os: # - osx julia: - 0.7 + - 1.0 + - nightly # uncomment the following lines to override the default test script # script: # - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi diff --git a/src/data/cmudict.jl b/src/data/cmudict.jl index 3ac0634d..926f2342 100644 --- a/src/data/cmudict.jl +++ b/src/data/cmudict.jl @@ -14,7 +14,7 @@ function load() return end end - info("Downloading CMUDict dataset") + @info "Downloading CMUDict dataset" mkpath(deps("cmudict")) for x in suffixes download("$cache_prefix/http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x", diff --git a/test/runtests.jl b/test/runtests.jl index 15f59459..f751c1f2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,7 +12,7 @@ include("layers/stateless.jl") include("optimise.jl") include("data.jl") -if Base.find_in_path("CuArrays") ≠ nothing +if Base.find_package("CuArrays") ≠ nothing include("cuda/cuda.jl") end From 69ccaf044f9d996a393bf4d960e9b7d5ffe02cb4 Mon Sep 17 00:00:00 2001 From: Josh Christie Date: Sat, 11 Aug 2018 15:46:01 +0100 Subject: [PATCH 044/121] Allow failures on nightly --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index dfdc0496..9bf07dd6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,9 @@ julia: # script: # - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi # - julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)' +matrix: + allow_failures: + - julia: nightly after_success: - julia -e 'Pkg.add("Documenter")' - julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))' From 4683e925d408fa20c99acd435aad58bf6e4ff154 Mon Sep 17 00:00:00 2001 From: ayush1999 Date: Sat, 11 Aug 2018 20:55:14 +0530 Subject: [PATCH 045/121] Final changes --- src/layers/conv.jl | 4 ++-- src/tracker/Tracker.jl | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index e73e582f..78509c84 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,6 +1,6 @@ using NNlib: conv -@generated sub2(::Type{Val{N}}) where N = :(Val{$(N-2)}) +@generated sub2(::Type{Val{N}}) where N = :(Val($(N-2))) expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) @@ -32,7 +32,7 @@ Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn, stride = 1, pad = 0, dilation = 1) where N = - Conv(param(init(k..., ch...)), param(zero(ch[2])), σ, + Conv(param(init(k..., ch...)), param(zeros(ch[2])), σ, stride = stride, pad = pad, dilation = dilation) @treelike Conv diff --git a/src/tracker/Tracker.jl b/src/tracker/Tracker.jl index 2c4951a9..190837ab 100644 --- a/src/tracker/Tracker.jl +++ b/src/tracker/Tracker.jl @@ -77,8 +77,7 @@ include("numeric.jl") Hook into gradient backpropagation. `x` is unmodified, but when backpropagating `f` will be applied to the incoming gradient. For example, `hook(-, x)` will reverse -the sign of the gradient applied to `x`. -""" +the sign of the gradient applied to `x`.""" hook(f, x) = istracked(x) ? track(hook, f, x) : x @grad hook(f, x) = data(x), Δ -> (nothing, f(Δ)) From 2d80f680871ac2ac1646ba503684072e6fd410bc Mon Sep 17 00:00:00 2001 From: "femtocleaner[bot]" Date: Tue, 14 Aug 2018 16:46:23 +0000 Subject: [PATCH 046/121] Fix deprecations --- src/Flux.jl | 2 -- src/data/sentiment.jl | 6 +++--- src/tracker/idset.jl | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index 0989aa9c..768e851c 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -1,5 +1,3 @@ -__precompile__() - module Flux # Zero Flux Given diff --git a/src/data/sentiment.jl b/src/data/sentiment.jl index a269107f..a58cd9d4 100644 --- a/src/data/sentiment.jl +++ b/src/data/sentiment.jl @@ -14,7 +14,7 @@ getfile(r, name) = r.files[findfirst(x -> x.name == name, r.files)] function getfile(name) r = ZipFile.Reader(deps("sentiment.zip")) - text = readstring(getfile(r, "trees/$name")) + text = read(getfile(r, "trees/$name"), String) close(r) return text end @@ -29,12 +29,12 @@ function parsetree(s) s = replace(s, r"\$", s -> "\\\$") s = replace(s, r"[^\s\(\)]+", s -> "\"$s\"") s = replace(s, " ", ", ") - return totree(parse(s)) + return totree(Meta.parse(s)) end function gettrees(name) load() - ss = split(getfile("$name.txt"), '\n', keep = false) + ss = split(getfile("$name.txt"), '\n', keepempty = false) return parsetree.(ss) end diff --git a/src/tracker/idset.jl b/src/tracker/idset.jl index d1c507b4..62570c99 100644 --- a/src/tracker/idset.jl +++ b/src/tracker/idset.jl @@ -11,7 +11,7 @@ Base.push!(s::IdSet{T}, x::T) where T = (s.dict[x] = nothing; s) Base.delete!(s::IdSet{T}, x::T) where T = (delete!(s.dict, x); s) Base.in(x, s::IdSet) = haskey(s.dict, x) -(::Type{IdSet{T}})(xs) where T = push!(IdSet{T}(), xs...) +IdSet{T}(xs) where T = push!(IdSet{T}(), xs...) IdSet(xs) = IdSet{eltype(xs)}(xs) From a43127f8811836a12ef82a72f767d6a71a7b8412 Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 15 Aug 2018 12:16:12 +0200 Subject: [PATCH 047/121] fix copy_transpose! --- src/cuda/cudnn.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cuda/cudnn.jl b/src/cuda/cudnn.jl index 585b948d..01a368c7 100644 --- a/src/cuda/cudnn.jl +++ b/src/cuda/cudnn.jl @@ -1,6 +1,8 @@ using CuArrays.CUDNN: @check, libcudnn, cudnnStatus_t, libcudnn_handle, cudnnDataType, TensorDesc, FilterDesc +using LinearAlgebra + mutable struct DropoutDesc ptr::Ptr{Nothing} states::CuVector{UInt8} @@ -244,14 +246,14 @@ import ..Tracker: TrackedArray using CUDAnative using CuArrays: @cuindex, cudims -function copy_transpose!(dst::CuArray, src::CuArray) +function LinearAlgebra.copy_transpose!(dst::CuArray, src::CuArray) function kernel(dst, src) I = @cuindex dst dst[I...] = src[reverse(I)...] return end blk, thr = cudims(dst) - @cuda (blk, thr) kernel(dst, src) + @cuda blocks=blk threads=thr kernel(dst, src) return dst end From 517dc58ce01664c427a2929262e956a63db9417f Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Thu, 16 Aug 2018 18:17:43 -0400 Subject: [PATCH 048/121] properly initialize MNIST array for 1.0 --- src/data/mnist.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/mnist.jl b/src/data/mnist.jl index 3322df84..c068bc7d 100644 --- a/src/data/mnist.jl +++ b/src/data/mnist.jl @@ -49,7 +49,7 @@ function labelheader(io::IO) end function rawimage(io::IO) - img = Array{Gray}(NCOLS, NROWS) + img = Array{Gray}(undef, NCOLS, NROWS) for i in 1:NCOLS, j in 1:NROWS img[i, j] = reinterpret(Colors.N0f8, read(io, UInt8)) end From 995543f64892050824cb4bfc5263be313506848e Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Fri, 17 Aug 2018 11:44:01 +0100 Subject: [PATCH 049/121] rm dates --- test/utils.jl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/test/utils.jl b/test/utils.jl index 119baaff..2aade669 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,15 +1,13 @@ using Flux using Flux: throttle, jacobian, initn, glorot_uniform, glorot_normal using StatsBase: std -using Dates using Random using Test -using Dates: now @testset "Throttle" begin @testset "default behaviour" begin a = [] - f = throttle(()->push!(a, now()), 1, leading=true, trailing=false) + f = throttle(()->push!(a, time()), 1, leading=true, trailing=false) f() f() f() @@ -19,7 +17,7 @@ using Dates: now @testset "leading behaviour" begin a = [] - f = throttle(()->push!(a, now()), 1, leading=true, trailing=false) + f = throttle(()->push!(a, time()), 1, leading=true, trailing=false) f() @test length(a) == 1 f() @@ -31,7 +29,7 @@ using Dates: now @testset "trailing behaviour" begin a = [] - f = throttle(()->push!(a, now()), 1, leading=false, trailing=true) + f = throttle(()->push!(a, time()), 1, leading=false, trailing=true) f() @test length(a) == 0 f() From 23af487f982ccfa94298c5a55575b6668ce9f80d Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Fri, 17 Aug 2018 11:44:07 +0100 Subject: [PATCH 050/121] ignore manifest --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9d6de240..e2cb9ecd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ docs/build/ docs/site/ docs/flux.css deps +Manifest.toml From 24a3bce4954f15a8d4c2895f670a35e7b7bb112e Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 17 Aug 2018 17:46:13 +0530 Subject: [PATCH 051/121] added stop to break training loop --- src/optimise/train.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 95009444..0b54ccfa 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -1,5 +1,6 @@ using Juno using Flux.Tracker: back! +include("../utls.jl") runall(f) = f runall(fs::AbstractVector) = () -> foreach(call, fs) @@ -33,7 +34,12 @@ The callback can return `:stop` to interrupt the training loop. Multiple optimisers and callbacks can be passed to `opt` and `cb` as arrays. """ function train!(loss, data, opt; cb = () -> ()) - cb = runall(cb) + cb = try: + runall(cb) + catch e + if e isa StopException || rethrow() + @info "Stop Condition Met" + break opt = runall(opt) @progress for d in data l = loss(d...) From 8ad72e51ea17f5bb6dd8a15f838289d15f67b890 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 17 Aug 2018 19:33:51 +0530 Subject: [PATCH 052/121] added function to stop training --- src/Flux.jl | 1 + src/utils.jl | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/src/Flux.jl b/src/Flux.jl index 768e851c..f9268829 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -24,6 +24,7 @@ export SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM include("utils.jl") +# export stop include("onehot.jl") include("treelike.jl") diff --git a/src/utils.jl b/src/utils.jl index c53f7864..1e119148 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -153,3 +153,9 @@ function jacobian(m,x) end J' end + +struct StopException <: Exception end + +function stop() + throw(StopException) +end \ No newline at end of file From 0524964400046631e60ad79672a6fa89537f3dbe Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 17 Aug 2018 19:40:48 +0530 Subject: [PATCH 053/121] fixed typo --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 0b54ccfa..b2631b23 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -1,6 +1,6 @@ using Juno using Flux.Tracker: back! -include("../utls.jl") +include("../utils.jl") runall(f) = f runall(fs::AbstractVector) = () -> foreach(call, fs) From 999b00b64da0dbb410811686a79aeedc8e60d62b Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 17 Aug 2018 19:45:10 +0530 Subject: [PATCH 054/121] fixed typo --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index b2631b23..a3af8d66 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -34,7 +34,7 @@ The callback can return `:stop` to interrupt the training loop. Multiple optimisers and callbacks can be passed to `opt` and `cb` as arrays. """ function train!(loss, data, opt; cb = () -> ()) - cb = try: + cb = try runall(cb) catch e if e isa StopException || rethrow() From 65a5ecccd2823d011f5ba7a0375fe465327e4fa4 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sat, 18 Aug 2018 08:24:49 +0530 Subject: [PATCH 055/121] returning --- src/optimise/train.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index a3af8d66..c7899b08 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -37,9 +37,9 @@ function train!(loss, data, opt; cb = () -> ()) cb = try runall(cb) catch e - if e isa StopException || rethrow() + if e isa StopException || rethrow(e) @info "Stop Condition Met" - break + :stop opt = runall(opt) @progress for d in data l = loss(d...) From 887bfad312467c67e3ba894f771baf72c15c87cb Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sat, 18 Aug 2018 08:28:47 +0530 Subject: [PATCH 056/121] returning :stop --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index c7899b08..3bf06d8f 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -39,7 +39,7 @@ function train!(loss, data, opt; cb = () -> ()) catch e if e isa StopException || rethrow(e) @info "Stop Condition Met" - :stop + return :stop opt = runall(opt) @progress for d in data l = loss(d...) From 3f42301e077c96b1a263da65f628794a98035c5b Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 18 Aug 2018 11:50:52 -0400 Subject: [PATCH 057/121] recurrent bug fixes --- src/layers/recurrent.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index d97c7fd7..d9c51127 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -1,4 +1,4 @@ -gate(h, n) = (1:h) + h*(n-1) +gate(h, n) = (1:h) .+ h*(n-1) gate(x::AbstractVector, h, n) = x[gate(h,n)] gate(x::AbstractMatrix, h, n) = x[gate(h,n),:] @@ -122,9 +122,9 @@ end function LSTMCell(in::Integer, out::Integer; init = glorot_uniform) - cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(zero(out*4)), + cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(zeros(out*4)), param(initn(out)), param(initn(out))) - cell.b.data[gate(out, 2)] = 1 + cell.b.data[gate(out, 2)] .= 1 return cell end @@ -170,7 +170,7 @@ end GRUCell(in, out; init = glorot_uniform) = GRUCell(param(init(out*3, in)), param(init(out*3, out)), - param(zero(out*3)), param(initn(out))) + param(zeros(out*3)), param(initn(out))) function (m::GRUCell)(h, x) b, o = m.b, size(h, 1) From f2021d41acd91bd5a2cc6644927367c8eab76ecf Mon Sep 17 00:00:00 2001 From: Dominique Luna Date: Sat, 18 Aug 2018 14:18:50 -0400 Subject: [PATCH 058/121] initn -> init --- src/layers/recurrent.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index d9c51127..4064ed7b 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -84,7 +84,7 @@ end RNNCell(in::Integer, out::Integer, σ = tanh; init = glorot_uniform) = RNNCell(σ, param(init(out, in)), param(init(out, out)), - param(zeros(out)), param(initn(out))) + param(zeros(out)), param(init(out))) function (m::RNNCell)(h, x) σ, Wi, Wh, b = m.σ, m.Wi, m.Wh, m.b @@ -123,7 +123,7 @@ end function LSTMCell(in::Integer, out::Integer; init = glorot_uniform) cell = LSTMCell(param(init(out*4, in)), param(init(out*4, out)), param(zeros(out*4)), - param(initn(out)), param(initn(out))) + param(init(out)), param(init(out))) cell.b.data[gate(out, 2)] .= 1 return cell end @@ -170,7 +170,7 @@ end GRUCell(in, out; init = glorot_uniform) = GRUCell(param(init(out*3, in)), param(init(out*3, out)), - param(zeros(out*3)), param(initn(out))) + param(zeros(out*3)), param(init(out))) function (m::GRUCell)(h, x) b, o = m.b, size(h, 1) From 2aa057ec0820ed65ad5926aa376233badc045de1 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 14:54:54 +0530 Subject: [PATCH 059/121] fixed throwing exception --- src/optimise/train.jl | 4 ++-- src/utils.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 3bf06d8f..2cc20268 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -36,8 +36,8 @@ Multiple optimisers and callbacks can be passed to `opt` and `cb` as arrays. function train!(loss, data, opt; cb = () -> ()) cb = try runall(cb) - catch e - if e isa StopException || rethrow(e) + catch ex + if ex isa StopException || rethrow(ex) @info "Stop Condition Met" return :stop opt = runall(opt) diff --git a/src/utils.jl b/src/utils.jl index 1e119148..c746f391 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -157,5 +157,5 @@ end struct StopException <: Exception end function stop() - throw(StopException) + throw(StopException()) end \ No newline at end of file From 8229c8e0453dab07bb5b83bed7a1d2e2169b3ed4 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 15:17:07 +0530 Subject: [PATCH 060/121] modified training loop --- src/optimise/train.jl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 2cc20268..1928a80d 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -34,18 +34,22 @@ The callback can return `:stop` to interrupt the training loop. Multiple optimisers and callbacks can be passed to `opt` and `cb` as arrays. """ function train!(loss, data, opt; cb = () -> ()) - cb = try - runall(cb) - catch ex - if ex isa StopException || rethrow(ex) - @info "Stop Condition Met" - return :stop + cb = runall(cb) opt = runall(opt) @progress for d in data l = loss(d...) @interrupts back!(l) opt() - cb() == :stop && break + try + cb() + catch ex + if ex isa StopException + @info "Stop condition met" + break + else + rethrow(ex) + end + end end From fbd82a692536da72e33a8d2397febdfd4f74d43c Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 15:19:45 +0530 Subject: [PATCH 061/121] added end --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 1928a80d..92208bc1 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -49,7 +49,7 @@ function train!(loss, data, opt; cb = () -> ()) else rethrow(ex) end - + end end end From a53a5c8350a896ff906b8790e81152243e317adc Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 15:31:33 +0530 Subject: [PATCH 062/121] exporting stop --- src/Flux.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Flux.jl b/src/Flux.jl index f9268829..ab45b630 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -24,7 +24,7 @@ export SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM include("utils.jl") -# export stop +export stop include("onehot.jl") include("treelike.jl") From b0f83f93ff15454af659b187de79c89bc46bd774 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 16:41:13 +0530 Subject: [PATCH 063/121] exported StopException --- src/Flux.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Flux.jl b/src/Flux.jl index ab45b630..e406b5c6 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -24,7 +24,7 @@ export SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM include("utils.jl") -export stop +export stop, StopException include("onehot.jl") include("treelike.jl") From 5c42c8689cfe8228a7a029b3fae62c2530890ace Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 17:04:31 +0530 Subject: [PATCH 064/121] printing expception --- src/optimise/train.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 92208bc1..0663b9d4 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -47,6 +47,7 @@ function train!(loss, data, opt; cb = () -> ()) @info "Stop condition met" break else + println(ex) rethrow(ex) end end From 257e2a7d2e518c4375faaead09623674bd961a36 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 17:11:11 +0530 Subject: [PATCH 065/121] checking exception --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 0663b9d4..12a1a272 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -43,7 +43,7 @@ function train!(loss, data, opt; cb = () -> ()) try cb() catch ex - if ex isa StopException + if ex isa StopException() @info "Stop condition met" break else From 9c98272cf03f91a8f6f6f4ae9fd26cd24df44061 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 17:38:00 +0530 Subject: [PATCH 066/121] catching exception --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 12a1a272..0663b9d4 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -43,7 +43,7 @@ function train!(loss, data, opt; cb = () -> ()) try cb() catch ex - if ex isa StopException() + if ex isa StopException @info "Stop condition met" break else From 1228e9c5e2e627a6d0a26390ddfe29d9f91f89e4 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 19 Aug 2018 22:55:14 +0530 Subject: [PATCH 067/121] removed include statement --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 0663b9d4..3f26fdbd 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -1,6 +1,6 @@ using Juno using Flux.Tracker: back! -include("../utils.jl") +# include("../utils.jl") runall(f) = f runall(fs::AbstractVector) = () -> foreach(call, fs) From e239eb110547f029c9e59b7de1a81b741151f7d7 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 13:30:05 +0530 Subject: [PATCH 068/121] properly importing functions --- src/Flux.jl | 4 ++-- src/optimise/train.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index e406b5c6..c01dbd4e 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -7,7 +7,7 @@ using MacroTools: @forward export Chain, Dense, RNN, LSTM, GRU, Conv, Dropout, LayerNorm, BatchNorm, - params, mapleaves, cpu, gpu + params, mapleaves, cpu, gpu, stop, StopException @reexport using NNlib using NNlib: @fix @@ -24,7 +24,6 @@ export SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM include("utils.jl") -export stop, StopException include("onehot.jl") include("treelike.jl") @@ -39,3 +38,4 @@ include("data/Data.jl") @init @require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" include("cuda/cuda.jl") end # module + diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 3f26fdbd..4b1e205b 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -1,6 +1,6 @@ using Juno using Flux.Tracker: back! -# include("../utils.jl") +import Flux: stop, StopException runall(f) = f runall(fs::AbstractVector) = () -> foreach(call, fs) From 06aad375fc994b085b1e2fbe46078b25d3d2083a Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 13:35:55 +0530 Subject: [PATCH 069/121] properly importing functions --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 4b1e205b..0a06492c 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -1,6 +1,6 @@ using Juno using Flux.Tracker: back! -import Flux: stop, StopException +using Flux: stop, StopException runall(f) = f runall(fs::AbstractVector) = () -> foreach(call, fs) From 394b4167cedb2a2175721ae49a805734e5c05f74 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 13:43:08 +0530 Subject: [PATCH 070/121] moving stop to Optimise --- src/Flux.jl | 4 ++-- src/optimise/Optimise.jl | 2 +- src/optimise/train.jl | 8 +++++++- src/utils.jl | 10 ++++++---- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index c01dbd4e..cd407705 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -7,7 +7,7 @@ using MacroTools: @forward export Chain, Dense, RNN, LSTM, GRU, Conv, Dropout, LayerNorm, BatchNorm, - params, mapleaves, cpu, gpu, stop, StopException + params, mapleaves, cpu, gpu @reexport using NNlib using NNlib: @fix @@ -21,7 +21,7 @@ include("optimise/Optimise.jl") using .Optimise using .Optimise: @epochs export SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, - RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM + RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM, stop, StopException include("utils.jl") include("onehot.jl") diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index ee7723bc..c4828c9e 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -2,7 +2,7 @@ module Optimise export train!, SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, - RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM + RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM, stop, StopException struct Param{T} x::T diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 0a06492c..341e6b43 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -1,6 +1,5 @@ using Juno using Flux.Tracker: back! -using Flux: stop, StopException runall(f) = f runall(fs::AbstractVector) = () -> foreach(call, fs) @@ -15,6 +14,13 @@ macro interrupts(ex) end) end +struct StopException <: Exception + x::Symbol +end + +function stop(x) + throw(StopException( + """ train!(loss, data, opt) diff --git a/src/utils.jl b/src/utils.jl index c746f391..321e0d94 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -154,8 +154,10 @@ function jacobian(m,x) J' end -struct StopException <: Exception end +# struct StopException <: Exception +# x::Symbol +# end -function stop() - throw(StopException()) -end \ No newline at end of file +# function stop(x) +# throw(StopException(x)) +# end \ No newline at end of file From 06db6ed314c588443cab1659f92599ac01da0114 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 13:48:28 +0530 Subject: [PATCH 071/121] housekeeping: fixing typo --- src/optimise/train.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 341e6b43..c57f093f 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -19,7 +19,8 @@ struct StopException <: Exception end function stop(x) - throw(StopException( + throw(StopException()) +end """ train!(loss, data, opt) From df22bc5c8f910cb7fa8bb29d7fdc0ff993e8defb Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 14:02:09 +0530 Subject: [PATCH 072/121] removed argument from stop function --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index c57f093f..fbfb5670 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -18,7 +18,7 @@ struct StopException <: Exception x::Symbol end -function stop(x) +function stop() throw(StopException()) end From 51578177a5382ec015ace5b5085e3ef433238f54 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 14:08:23 +0530 Subject: [PATCH 073/121] removed arguments from StopException --- src/optimise/train.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index fbfb5670..2fc651bf 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -14,9 +14,7 @@ macro interrupts(ex) end) end -struct StopException <: Exception - x::Symbol -end +struct StopException <: Exception end function stop() throw(StopException()) From 756207e782d1f56c1defcd0061a268c889e2fd93 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 14:20:33 +0530 Subject: [PATCH 074/121] added docs --- src/optimise/train.jl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 2fc651bf..c84a8191 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -15,7 +15,20 @@ macro interrupts(ex) end struct StopException <: Exception end +""" + stop() +Call `stop()` in a callback to indicate when a callback condition is met. +This would trigger the train loop to stop and exit. + +```julia +# Example callback: + +cb = function () + accuracy() > 0.9 && stop() +end +``` +""" function stop() throw(StopException()) end @@ -52,7 +65,6 @@ function train!(loss, data, opt; cb = () -> ()) @info "Stop condition met" break else - println(ex) rethrow(ex) end end From 624dc6cb85619caee85f879b24d319aeab7c6d78 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 14:20:33 +0530 Subject: [PATCH 075/121] changed training loop test --- test/optimise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/optimise.jl b/test/optimise.jl index 7e99c294..502d9ab2 100644 --- a/test/optimise.jl +++ b/test/optimise.jl @@ -23,7 +23,7 @@ end Flux.train!(() -> (sleep(0.1); i += 1; l), Iterators.repeated((), 100), ()->(), - cb = Flux.throttle(() -> (i > 3 && :stop), 1)) + cb = Flux.throttle(() -> (i > 3 && stop()), 1)) @test 3 < i < 50 end From 5a023a9ccc33048485f0f98af60706d3ceac1e25 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Mon, 20 Aug 2018 13:08:04 +0100 Subject: [PATCH 076/121] WIP 1.0 support closes #353 --- src/Flux.jl | 1 - src/cuda/cuda.jl | 2 +- src/cuda/cudnn.jl | 12 ++++++------ src/layers/basic.jl | 2 +- src/layers/stateless.jl | 2 +- src/onehot.jl | 5 +++-- src/tracker/array.jl | 29 +++++++++++++++++++++++++++++ test/cuda/cuda.jl | 1 + test/runtests.jl | 5 ++++- 9 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index 768e851c..614eeaf7 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -10,7 +10,6 @@ export Chain, Dense, RNN, LSTM, GRU, Conv, params, mapleaves, cpu, gpu @reexport using NNlib -using NNlib: @fix include("tracker/Tracker.jl") using .Tracker diff --git a/src/cuda/cuda.jl b/src/cuda/cuda.jl index eaa3fe00..fe36bf5d 100644 --- a/src/cuda/cuda.jl +++ b/src/cuda/cuda.jl @@ -1,6 +1,6 @@ module CUDA -using CuArrays +using ..CuArrays CuArrays.cudnn_available() && include("cudnn.jl") diff --git a/src/cuda/cudnn.jl b/src/cuda/cudnn.jl index 01a368c7..fab67bac 100644 --- a/src/cuda/cudnn.jl +++ b/src/cuda/cudnn.jl @@ -1,7 +1,7 @@ -using CuArrays.CUDNN: @check, libcudnn, cudnnStatus_t, libcudnn_handle, +using .CuArrays.CUDNN: @check, libcudnn, cudnnStatus_t, libcudnn_handle, cudnnDataType, TensorDesc, FilterDesc -using LinearAlgebra +using LinearAlgebra mutable struct DropoutDesc ptr::Ptr{Nothing} @@ -243,8 +243,8 @@ end import ..Flux: Flux, relu import ..Tracker: TrackedArray -using CUDAnative -using CuArrays: @cuindex, cudims +using .CuArrays.CUDAnative +using .CuArrays: @cuindex, cudims function LinearAlgebra.copy_transpose!(dst::CuArray, src::CuArray) function kernel(dst, src) @@ -326,7 +326,7 @@ end h_ = hBatch(x, data(h)) dx, dh = backwardData(descs[m], y, dy, dho, h_, reserve) (dWi, dWh), db = backwardWeights(descs[m], data(x), h_, y, reserve) - nobacksies(:RNN, (dx, unbroadcast(size(h), dh), dWi.', dWh.', db)) + nobacksies(:RNN, (dx, unbroadcast(size(h), dh), transpose(dWi), transpose(dWh), db)) end end @@ -341,6 +341,6 @@ end (dWi, dWh), db = backwardWeights(descs[m], data(x), h_, y, reserve) nobacksies(:RNN, (dx, unbroadcast(size(h), dh), unbroadcast(size(c), dc), - dWi.', dWh.', db)) + transpose(dWi), transpose(dWh), db)) end end diff --git a/src/layers/basic.jl b/src/layers/basic.jl index f7344484..5e5a88bd 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -77,7 +77,7 @@ end function (a::Dense)(x) W, b, σ = a.W, a.b, a.σ - @fix σ.(W*x .+ b) + σ.(W*x .+ b) end function Base.show(io::IO, l::Dense) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index ba80e8a6..6d040f4f 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -5,7 +5,7 @@ using NNlib: logsoftmax, logσ mse(ŷ, y) = sum((ŷ .- y).^2)/length(y) function crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1) - @fix -sum(y .* log.(ŷ) .* weight) / size(y, 2) + -sum(y .* log.(ŷ) .* weight) / size(y, 2) end @deprecate logloss(x, y) crossentropy(x, y) diff --git a/src/onehot.jl b/src/onehot.jl index dfc632a1..365da63d 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -33,8 +33,9 @@ import Adapt.adapt adapt(T, xs::OneHotMatrix) = OneHotMatrix(xs.height, adapt(T, xs.data)) @init @require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" begin - import CuArrays: CuArray, cudaconvert - Base.Broadcast._containertype(::Type{<:OneHotMatrix{<:CuArray}}) = CuArray + import .CuArrays: CuArray, cudaconvert + import Base.Broadcast: BroadcastStyle, ArrayStyle + BroadcastStyle(::Type{<:OneHotMatrix{<:CuArray}}) = ArrayStyle{CuArray}() cudaconvert(x::OneHotMatrix{<:CuArray}) = OneHotMatrix(x.height, cudaconvert(x.data)) end diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 20568086..e9fa1a1b 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -381,3 +381,32 @@ function Base.copy(bc::Broadcast.Broadcasted{TrackedStyle}) bc = Broadcast.flatten(bc) ∇broadcast(bc.f, bc.args...) end + +using Requires + +# https://github.com/FluxML/Flux.jl/issues/353 +@init @eval Base.Broadcast begin + function flatten(bc::Broadcasted{Style}) where {Style} + isflat(bc) && return bc + args = cat_nested(bc) + let makeargs = make_makeargs(bc), f = bc.f + newf = @inline function(args::Vararg{Any,N}) where N + f(makeargs(args...)...) + end + return Broadcasted{Style}(newf, args, bc.axes) + end + end + @inline function make_makeargs(makeargs, t::Tuple{<:Broadcasted,Vararg{Any}}) + bc = t[1] + let makeargs = make_makeargs(makeargs, tail(t)), f = bc.f + let makeargs = make_makeargs(makeargs, bc.args) + headargs, tailargs = make_headargs(bc.args), make_tailargs(bc.args) + return @inline function(args::Vararg{Any,N}) where N + args1 = makeargs(args...) + a, b = headargs(args1...), tailargs(args1...) + (f(a...), b...) + end + end + end + end +end diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index fd860189..f0512b72 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -14,6 +14,7 @@ cx = gpu(x) x = Flux.onehotbatch([1, 2, 3], 1:3) cx = gpu(x) @test cx isa Flux.OneHotMatrix && cx.data isa CuArray +@test (cx .+ 1) isa CuArray m = Chain(Dense(10, 5, tanh), Dense(5, 2), softmax) cm = gpu(m) diff --git a/test/runtests.jl b/test/runtests.jl index f751c1f2..04333d0d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,9 @@ using Random Random.seed!(0) +# So we can use the system CuArrays +insert!(LOAD_PATH, 2, "@v#.#") + @testset "Flux" begin include("utils.jl") @@ -12,7 +15,7 @@ include("layers/stateless.jl") include("optimise.jl") include("data.jl") -if Base.find_package("CuArrays") ≠ nothing +if Base.find_package("CuArrays") != nothing include("cuda/cuda.jl") end From 1af7a53e1f9ba096e4cd2acc71b80576eb1d86d1 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 20 Aug 2018 18:10:20 +0530 Subject: [PATCH 077/121] housekeeping: removed commented code --- src/utils.jl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 321e0d94..c6bbdb13 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -152,12 +152,4 @@ function jacobian(m,x) xp.grad .*= 0 # Reset gradient accumulator end J' -end - -# struct StopException <: Exception -# x::Symbol -# end - -# function stop(x) -# throw(StopException(x)) -# end \ No newline at end of file +end \ No newline at end of file From 1115eda6af39cfd87862f991c50e63f3d34586e2 Mon Sep 17 00:00:00 2001 From: pshashk Date: Mon, 20 Aug 2018 16:11:56 +0300 Subject: [PATCH 078/121] repeat fix ERROR: UndefVarError: A not defined --- src/tracker/array.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 20568086..fb1e1fff 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -86,9 +86,9 @@ Base.adjoint(xs::TrackedArray) = track(adjoint, xs) @grad transpose(xs) = transpose(data(xs)), Δ -> (reshape(transpose(Δ), size(xs)),) @grad adjoint(xs) = data(xs)', Δ -> (reshape(Δ', size(xs)),) -Base.repeat(A::TrackedArray; kw...) = track(repeat, A; kw...) +Base.repeat(xs::TrackedArray; kw...) = track(repeat, xs; kw...) -@grad function repeat(xs; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A))) +@grad function repeat(xs; inner=ntuple(x->1, ndims(xs)), outer=ntuple(x->1, ndims(xs))) repeat(data(xs), inner = inner, outer = outer), function (Δ) Δ′ = zero(xs) S = size(xs) From e68b8765b67476ed11316d2d88f416d23e222535 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Mon, 20 Aug 2018 14:41:46 +0100 Subject: [PATCH 079/121] broadcast fixes --- src/tracker/array.jl | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index e9fa1a1b..559891da 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -370,16 +370,26 @@ function ∇broadcast(f, args::Vararg{Any,N}) where N track(Call(back, tracker.(args)), y) end -using Base.Broadcast: BroadcastStyle +using Base.Broadcast: BroadcastStyle, ArrayStyle, Broadcasted, broadcasted struct TrackedStyle <: BroadcastStyle end Broadcast.BroadcastStyle(::Type{<:Union{TrackedArray,TrackedReal}}) = TrackedStyle() Broadcast.BroadcastStyle(::TrackedStyle, ::BroadcastStyle) = TrackedStyle() -function Base.copy(bc::Broadcast.Broadcasted{TrackedStyle}) - bc = Broadcast.flatten(bc) - ∇broadcast(bc.f, bc.args...) +# We have to re-build the original broadcast struct to get the appropriate array +# style. We need this primarily to support CuArrays' broadcasting fixes. +broadcast_rebuild(xs) = data(xs) + +broadcast_rebuild(bc::Broadcasted) = + broadcasted(bc.f, broadcast_rebuild.(bc.args)...) + +preprocess(x) = x + +function Base.Broadcast.materialize(bc::Broadcasted{TrackedStyle}) + bc1 = Broadcast.flatten(bc) + bc2 = Broadcast.flatten(broadcast_rebuild(bc)) + ∇broadcast(bc2.f, bc1.args...) end using Requires From 3cfecaa4dbcc525ebfed3b330e8ceca7b8d7c92a Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Mon, 20 Aug 2018 15:38:25 +0100 Subject: [PATCH 080/121] test cleanup --- src/cuda/cudnn.jl | 14 ++++++++------ test/cuda/cuda.jl | 7 +++---- test/runtests.jl | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/cuda/cudnn.jl b/src/cuda/cudnn.jl index fab67bac..f033595a 100644 --- a/src/cuda/cudnn.jl +++ b/src/cuda/cudnn.jl @@ -19,8 +19,9 @@ function DropoutDesc(ρ::Real; seed::Integer=0) desc = DropoutDesc(d[], states) @check ccall((:cudnnSetDropoutDescriptor,libcudnn),cudnnStatus_t,(Ptr{Nothing},Ptr{Nothing},Cfloat,Ptr{Nothing},Csize_t,Culonglong), desc,libcudnn_handle[],ρ,states,length(states),seed) - finalizer(desc, x -> - @check ccall((:cudnnDestroyDropoutDescriptor,libcudnn),cudnnStatus_t,(Ptr{Nothing},),x)) + finalizer(desc) do x + @check ccall((:cudnnDestroyDropoutDescriptor,libcudnn),cudnnStatus_t,(Ptr{Nothing},),x) + end return desc end @@ -45,10 +46,10 @@ const RNN_ALGO_PERSIST_DYNAMIC = 2 # LSTM: [weight, bias] × [input, hidden] × [input, forget, newmem, output] function params(w::CuVector, input, hidden, n = 1) - slice(offset, shape) = reshape(w[offset+(1:prod(shape))], shape) + slice(offset, shape) = reshape(w[offset.+(1:prod(shape))], shape) wx = slice(0, (input, hidden*n)) wh = slice(length(wx), (hidden, hidden*n)) - bias = w[length(wx)+length(wh) + (1:hidden*n)] + bias = w[length(wx)+length(wh) .+ (1:hidden*n)] (wx, wh), bias end @@ -88,8 +89,9 @@ function RNNDesc{T}(mode::Int, input::Int, hidden::Int; layers = 1) where T w = cuzeros(T, rnnParamSize(T, d[], input)) # TODO: avoid reserve allocation here rd = RNNDesc{T}(mode, input, hidden, w, params(w, input, hidden, ngates(mode))..., d[]) - finalizer(rd, x -> - @check ccall((:cudnnDestroyRNNDescriptor,libcudnn),cudnnStatus_t,(Ptr{Nothing},),x)) + finalizer(rd) do x + @check ccall((:cudnnDestroyRNNDescriptor,libcudnn),cudnnStatus_t,(Ptr{Nothing},),x) + end return rd end diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index f0512b72..576c0f5d 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -26,10 +26,9 @@ x = [1,2,3] cx = gpu(x) @test Flux.crossentropy(x,x) ≈ Flux.crossentropy(cx,cx) -# Fails in Pkg.test ffs -# c = gpu(Conv((2,2),3=>4)) -# l = c(gpu(rand(10,10,3,2))) -# Flux.back!(sum(l)) +c = gpu(Conv((2,2),3=>4)) +l = c(gpu(rand(10,10,3,2))) +Flux.back!(sum(l)) end diff --git a/test/runtests.jl b/test/runtests.jl index 04333d0d..a6230f45 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,3 +1,18 @@ +# Pkg.test runs with --check_bounds=1, forcing all bounds checks. +# This is incompatible with CUDAnative (see JuliaGPU/CUDAnative.jl#98) +if Base.JLOptions().check_bounds == 1 + file = @__FILE__ + run(``` + $(Base.julia_cmd()) + --color=$(Base.have_color ? "yes" : "no") + --compiled-modules=$(Bool(Base.JLOptions().use_compiled_modules) ? "yes" : "no") + --startup-file=$(Base.JLOptions().startupfile != 2 ? "yes" : "no") + --code-coverage=$(["none", "user", "all"][1+Base.JLOptions().code_coverage]) + $(file) + ```) + exit() +end + using Flux, Test, Random using Random From 216d278e7ba113393c3aa733fa882c91541e9d21 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Mon, 20 Aug 2018 16:57:43 +0100 Subject: [PATCH 081/121] fix mnist loader --- src/data/Data.jl | 2 ++ src/data/mnist.jl | 2 +- test/data.jl | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/data/Data.jl b/src/data/Data.jl index a4f152c3..d5b5f38d 100644 --- a/src/data/Data.jl +++ b/src/data/Data.jl @@ -11,6 +11,8 @@ function __init__() end include("mnist.jl") +export MNIST + include("cmudict.jl") using .CMUDict diff --git a/src/data/mnist.jl b/src/data/mnist.jl index 3322df84..c068bc7d 100644 --- a/src/data/mnist.jl +++ b/src/data/mnist.jl @@ -49,7 +49,7 @@ function labelheader(io::IO) end function rawimage(io::IO) - img = Array{Gray}(NCOLS, NROWS) + img = Array{Gray}(undef, NCOLS, NROWS) for i in 1:NCOLS, j in 1:NROWS img[i, j] = reinterpret(Colors.N0f8, read(io, UInt8)) end diff --git a/test/data.jl b/test/data.jl index e3c3de7a..7a27c651 100644 --- a/test/data.jl +++ b/test/data.jl @@ -6,3 +6,6 @@ using Test @test length(CMUDict.phones()) == 39 @test length(CMUDict.symbols()) == 84 + +@test MNIST.images()[1] isa Matrix +@test MNIST.labels() isa Vector{Int64} From 616ed194df5e049290aee71ddf1767cdcf3f20f2 Mon Sep 17 00:00:00 2001 From: boathit Date: Tue, 21 Aug 2018 11:29:57 +0800 Subject: [PATCH 082/121] fix argmax and add test --- src/onehot.jl | 10 ++++++---- test/onehot.jl | 13 +++++++++++++ test/runtests.jl | 1 + 3 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 test/onehot.jl diff --git a/src/onehot.jl b/src/onehot.jl index 365da63d..aa19e4d4 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -54,11 +54,13 @@ end onehotbatch(ls, labels, unk...) = OneHotMatrix(length(labels), [onehot(l, labels, unk...) for l in ls]) -argmax(y::AbstractVector, labels = 1:length(y)) = - labels[findfirst(y, maximum(y))] +import Base:argmax -argmax(y::AbstractMatrix, l...) = - squeeze(mapslices(y -> argmax(y, l...), y, 1), 1) +argmax(y::AbstractVector, labels) = + labels[something(findfirst(isequal(maximum(y)), y), 0)] + +argmax(y::AbstractMatrix, labels) = + dropdims(mapslices(y -> argmax(y, labels), y, dims=1), dims=1) # Ambiguity hack diff --git a/test/onehot.jl b/test/onehot.jl new file mode 100644 index 00000000..28738f60 --- /dev/null +++ b/test/onehot.jl @@ -0,0 +1,13 @@ +using Flux:argmax +using Test + +@testset "argmax" begin + a = [1, 2, 5, 3.] + A = [1 20 5; 2 7 6; 3 9 10; 2 1 14] + labels = ['A', 'B', 'C', 'D'] + + @test argmax(a) == 3 + @test argmax(A) == CartesianIndex(1, 2) + @test argmax(a, labels) == 'C' + @test argmax(A, labels) == ['C', 'A', 'D'] +end diff --git a/test/runtests.jl b/test/runtests.jl index a6230f45..fd48e547 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -24,6 +24,7 @@ insert!(LOAD_PATH, 2, "@v#.#") @testset "Flux" begin include("utils.jl") +include("onehot.jl") include("tracker.jl") include("layers/normalisation.jl") include("layers/stateless.jl") From ed044e2df78a67c8ce647ac8e09eea831d9462ae Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 21 Aug 2018 23:22:20 +0530 Subject: [PATCH 083/121] changes as requested --- src/Flux.jl | 2 +- src/optimise/Optimise.jl | 2 +- src/optimise/train.jl | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index cd407705..525b33c4 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -21,7 +21,7 @@ include("optimise/Optimise.jl") using .Optimise using .Optimise: @epochs export SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, - RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM, stop, StopException + RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM, stop include("utils.jl") include("onehot.jl") diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index c4828c9e..ee7723bc 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -2,7 +2,7 @@ module Optimise export train!, SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, - RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM, stop, StopException + RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM struct Param{T} x::T diff --git a/src/optimise/train.jl b/src/optimise/train.jl index c84a8191..3ec3eb18 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -55,14 +55,13 @@ function train!(loss, data, opt; cb = () -> ()) cb = runall(cb) opt = runall(opt) @progress for d in data - l = loss(d...) - @interrupts back!(l) - opt() try - cb() + l = loss(d...) + @interrupts back!(l) + opt() + cb() == :stop && break catch ex if ex isa StopException - @info "Stop condition met" break else rethrow(ex) From 3d11322d378250402d9ed755dc8b298b39d1f516 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 22 Aug 2018 00:29:07 +0530 Subject: [PATCH 084/121] fixed docstring and not exporting stop --- src/Flux.jl | 3 ++- src/optimise/train.jl | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index 525b33c4..18195901 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -21,7 +21,8 @@ include("optimise/Optimise.jl") using .Optimise using .Optimise: @epochs export SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, - RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM, stop + RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM + include("utils.jl") include("onehot.jl") diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 3ec3eb18..007613b6 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -18,7 +18,7 @@ struct StopException <: Exception end """ stop() -Call `stop()` in a callback to indicate when a callback condition is met. +Call `Flux.stop()` in a callback to indicate when a callback condition is met. This would trigger the train loop to stop and exit. ```julia @@ -89,4 +89,4 @@ macro epochs(n, ex) @info "Epoch $i" $(esc(ex)) end) -end +end \ No newline at end of file From a7ad620f011b78ddddb975ac61472fa7bf3d54d5 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 22 Aug 2018 00:33:30 +0530 Subject: [PATCH 085/121] exporting stop --- src/optimise/Optimise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index ee7723bc..c4828c9e 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -2,7 +2,7 @@ module Optimise export train!, SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, - RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM + RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM, stop, StopException struct Param{T} x::T From 2f1a9847fae66519ff903a051115d088e8a5fdf8 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 22 Aug 2018 21:25:26 +0530 Subject: [PATCH 086/121] deprecate :stop from optimizers; housekeeping --- src/Flux.jl | 4 +--- src/optimise/Optimise.jl | 2 +- src/optimise/train.jl | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index 18195901..e0b40941 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -23,7 +23,6 @@ using .Optimise: @epochs export SGD, ADAM, ADAMW, AdaMax, Momentum, Nesterov, RMSProp, ADAGrad, ADADelta, AMSGrad, NADAM - include("utils.jl") include("onehot.jl") include("treelike.jl") @@ -38,5 +37,4 @@ include("data/Data.jl") @init @require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" include("cuda/cuda.jl") -end # module - +end # module \ No newline at end of file diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index c4828c9e..9c7fd605 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -20,4 +20,4 @@ using Flux.Tracker: TrackedArray Param(x::TrackedArray) = Param(x.data, x.grad) # Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad) -end +end \ No newline at end of file diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 007613b6..61eb1258 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -60,6 +60,7 @@ function train!(loss, data, opt; cb = () -> ()) @interrupts back!(l) opt() cb() == :stop && break + @deprecate :stop Flux.stop() catch ex if ex isa StopException break From 5dca80bd680d118abd9b68c1a6ddd9edf94da577 Mon Sep 17 00:00:00 2001 From: boathit Date: Thu, 23 Aug 2018 13:17:58 +0800 Subject: [PATCH 087/121] fix argmax and batch deprecations --- src/utils.jl | 2 +- test/utils.jl | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/utils.jl b/src/utils.jl index c53f7864..0ca38d14 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -24,7 +24,7 @@ julia> chunk(1:10, 3) """ chunk(xs, n) = collect(Iterators.partition(xs, ceil(Int, length(xs)/n))) -batchindex(xs, i) = (reverse(Base.tail(reverse(indices(xs))))..., i) +batchindex(xs, i) = (reverse(Base.tail(reverse(axes(xs))))..., i) """ frequencies(xs) diff --git a/test/utils.jl b/test/utils.jl index 2aade669..5abd0d96 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -90,3 +90,9 @@ end m = RNN(10, 5) @test size.(params(m)) == [(5, 10), (5, 5), (5,), (5,)] end + +@testset "batch" begin + xs = [[1,2,3],[4,5,6]] + X = [1 4; 2 5; 3 6] + @test batch(xs) == X +end From 33c901c19103d8415a41c6e06201a5711cd7a336 Mon Sep 17 00:00:00 2001 From: boathit Date: Thu, 23 Aug 2018 16:01:42 +0800 Subject: [PATCH 088/121] redo --- src/utils.jl | 2 +- test/utils.jl | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 0ca38d14..c53f7864 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -24,7 +24,7 @@ julia> chunk(1:10, 3) """ chunk(xs, n) = collect(Iterators.partition(xs, ceil(Int, length(xs)/n))) -batchindex(xs, i) = (reverse(Base.tail(reverse(axes(xs))))..., i) +batchindex(xs, i) = (reverse(Base.tail(reverse(indices(xs))))..., i) """ frequencies(xs) diff --git a/test/utils.jl b/test/utils.jl index 5abd0d96..2aade669 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -90,9 +90,3 @@ end m = RNN(10, 5) @test size.(params(m)) == [(5, 10), (5, 5), (5,), (5,)] end - -@testset "batch" begin - xs = [[1,2,3],[4,5,6]] - X = [1 4; 2 5; 3 6] - @test batch(xs) == X -end From 9d1d5187f349252365e73adb7a2da66caf29bfcf Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Thu, 23 Aug 2018 10:56:31 +0100 Subject: [PATCH 089/121] fix activations for 1.0 --- src/layers/basic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 5e5a88bd..3e887472 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -38,7 +38,7 @@ function Base.show(io::IO, c::Chain) print(io, ")") end -activations(c::Chain, x) = accumulate((x, m) -> m(x), x, c.layers) +activations(c::Chain, x) = accumulate((x, m) -> m(x), c.layers, init = x) """ Dense(in::Integer, out::Integer, σ = identity) From dfe7578216e3d2748eb87b0f2277980597720e18 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Thu, 23 Aug 2018 11:29:43 +0100 Subject: [PATCH 090/121] test repeat fix --- test/tracker.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/tracker.jl b/test/tracker.jl index 6cf4aba8..03d14c35 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -65,7 +65,7 @@ end @test gradtest(vcatf, rand(5)', rand(2,5)) end - + @testset for hcatf in [hcat, cat2] @test gradtest(hcatf, rand(5), rand(5)) @test gradtest(hcatf, rand(5)', rand(5)') @@ -117,6 +117,7 @@ end @test gradtest(x -> permutedims(x, [3,1,2]), rand(4,5,6)) +@test gradtest(x -> repeat(x; inner=2), rand(5)) @test gradtest(x -> repeat(x; inner=2, outer=3), rand(5)) @test gradtest(x -> repeat(x; inner=(2,2,1), outer=(1,1,3)), rand(5,4,3)) From 6c97846551a45742959d3982e79b831c8f76a6a2 Mon Sep 17 00:00:00 2001 From: boathit Date: Thu, 23 Aug 2018 20:47:43 +0800 Subject: [PATCH 091/121] rename argmax as onecold --- src/onehot.jl | 9 ++++++--- test/onehot.jl | 12 ++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/onehot.jl b/src/onehot.jl index aa19e4d4..1e1a0c0f 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -56,11 +56,14 @@ onehotbatch(ls, labels, unk...) = import Base:argmax -argmax(y::AbstractVector, labels) = +onecold(y::AbstractVector, labels = 1:length(y)) = labels[something(findfirst(isequal(maximum(y)), y), 0)] -argmax(y::AbstractMatrix, labels) = - dropdims(mapslices(y -> argmax(y, labels), y, dims=1), dims=1) +onecold(y::AbstractMatrix, labels...) = + dropdims(mapslices(y -> onecold(y, labels...), y, dims=1), dims=1) + +@deprecate argmax(y::AbstractVector, labels::AbstractVector) onecold(y, labels) +@deprecate argmax(y::AbstractMatrix, labels::AbstractVector) onecold(y, labels) # Ambiguity hack diff --git a/test/onehot.jl b/test/onehot.jl index 28738f60..7191b0b3 100644 --- a/test/onehot.jl +++ b/test/onehot.jl @@ -1,13 +1,13 @@ -using Flux:argmax +using Flux:onecold using Test @testset "argmax" begin a = [1, 2, 5, 3.] A = [1 20 5; 2 7 6; 3 9 10; 2 1 14] labels = ['A', 'B', 'C', 'D'] - - @test argmax(a) == 3 - @test argmax(A) == CartesianIndex(1, 2) - @test argmax(a, labels) == 'C' - @test argmax(A, labels) == ['C', 'A', 'D'] + + @test onecold(a) == 3 + @test onecold(A) == [3, 1, 4] + @test onecold(a, labels) == 'C' + @test onecold(A, labels) == ['C', 'A', 'D'] end From 7bfe4313211c6f38f034a5659932f033e37a0f79 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Thu, 23 Aug 2018 20:58:58 +0800 Subject: [PATCH 092/121] Fix issue #323 --- src/tracker/array.jl | 6 +++++- src/tracker/scalar.jl | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index ce72755d..35d2c39f 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -1,4 +1,4 @@ -import Base: *, == +import Base: *, ==, ≈ import LinearAlgebra using Statistics @@ -64,6 +64,10 @@ x::TrackedArray == y = data(x) == y y == x::TrackedArray = y == data(x) x::TrackedArray == y::TrackedArray = data(x) == data(y) +x::TrackedArray ≈ y = data(x) ≈ y +y ≈ x::TrackedArray = y ≈ data(x) +x::TrackedArray ≈ y::TrackedArray = data(x) ≈ data(y) + # Array Stdlib Base.getindex(xs::TrackedArray, i...) = track(getindex, xs, i...) diff --git a/src/tracker/scalar.jl b/src/tracker/scalar.jl index 9ff1895a..03892c46 100644 --- a/src/tracker/scalar.jl +++ b/src/tracker/scalar.jl @@ -32,6 +32,7 @@ Base.convert(::Type{TrackedReal{T}}, x::TrackedReal{S}) where {T,S} = Base.:(<)(x::TrackedReal, y::TrackedReal) = data(x) < data(y) Base.:(==)(x::TrackedReal, y::TrackedReal) = data(x) == data(y) +Base.:(≈)(x::TrackedReal, y::TrackedReal) = data(x) ≈ data(y) Base.eps(x::TrackedReal) = eps(data(x)) From dcde6d22173b5eb452c6415880cba1b1debef9e4 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Thu, 23 Aug 2018 15:44:28 +0100 Subject: [PATCH 093/121] tweaks --- docs/src/data/onehot.md | 12 ++++++------ src/onehot.jl | 11 +++++------ test/onehot.jl | 4 ++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/docs/src/data/onehot.md b/docs/src/data/onehot.md index 6a3cc64c..0bc3531b 100644 --- a/docs/src/data/onehot.md +++ b/docs/src/data/onehot.md @@ -3,7 +3,7 @@ It's common to encode categorical variables (like `true`, `false` or `cat`, `dog`) in "one-of-k" or ["one-hot"](https://en.wikipedia.org/wiki/One-hot) form. Flux provides the `onehot` function to make this easy. ``` -julia> using Flux: onehot +julia> using Flux: onehot, onecold julia> onehot(:b, [:a, :b, :c]) 3-element Flux.OneHotVector: @@ -18,22 +18,22 @@ julia> onehot(:c, [:a, :b, :c]) true ``` -The inverse is `argmax` (which can take a general probability distribution, as well as just booleans). +The inverse is `onecold` (which can take a general probability distribution, as well as just booleans). ```julia -julia> argmax(ans, [:a, :b, :c]) +julia> onecold(ans, [:a, :b, :c]) :c -julia> argmax([true, false, false], [:a, :b, :c]) +julia> onecold([true, false, false], [:a, :b, :c]) :a -julia> argmax([0.3, 0.2, 0.5], [:a, :b, :c]) +julia> onecold([0.3, 0.2, 0.5], [:a, :b, :c]) :c ``` ## Batches -`onehotbatch` creates a batch (matrix) of one-hot vectors, and `argmax` treats matrices as batches. +`onehotbatch` creates a batch (matrix) of one-hot vectors, and `onecold` treats matrices as batches. ```julia julia> using Flux: onehotbatch diff --git a/src/onehot.jl b/src/onehot.jl index 1e1a0c0f..5d902c77 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -54,16 +54,15 @@ end onehotbatch(ls, labels, unk...) = OneHotMatrix(length(labels), [onehot(l, labels, unk...) for l in ls]) -import Base:argmax - -onecold(y::AbstractVector, labels = 1:length(y)) = - labels[something(findfirst(isequal(maximum(y)), y), 0)] +onecold(y::AbstractVector, labels = 1:length(y)) = labels[Base.argmax(y)] onecold(y::AbstractMatrix, labels...) = dropdims(mapslices(y -> onecold(y, labels...), y, dims=1), dims=1) -@deprecate argmax(y::AbstractVector, labels::AbstractVector) onecold(y, labels) -@deprecate argmax(y::AbstractMatrix, labels::AbstractVector) onecold(y, labels) +function argmax(xs...) + Base.depwarn("`argmax(...) is deprecated, use `onecold(...)` instead.", :argmax) + return onecold(xs...) +end # Ambiguity hack diff --git a/test/onehot.jl b/test/onehot.jl index 7191b0b3..b0177f3e 100644 --- a/test/onehot.jl +++ b/test/onehot.jl @@ -1,11 +1,11 @@ using Flux:onecold using Test -@testset "argmax" begin +@testset "onecold" begin a = [1, 2, 5, 3.] A = [1 20 5; 2 7 6; 3 9 10; 2 1 14] labels = ['A', 'B', 'C', 'D'] - + @test onecold(a) == 3 @test onecold(A) == [3, 1, 4] @test onecold(a, labels) == 'C' From 634d34686ee2278f61ac62cf7e93d21cfdf6980c Mon Sep 17 00:00:00 2001 From: Yueh-Hua Tu Date: Fri, 24 Aug 2018 10:31:13 +0800 Subject: [PATCH 094/121] Add new constructors and test --- docs/src/models/layers.md | 4 ++-- src/layers/conv.jl | 36 +++++++++++++++++++++++------------- test/layers/conv.jl | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 test/layers/conv.jl diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index 070f6737..4bbb2ba0 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -6,8 +6,8 @@ These core layers form the foundation of almost all neural networks. Chain Dense Conv -Maxpool -Meanpool +MaxPool +MeanPool ``` ## Recurrent Layers diff --git a/src/layers/conv.jl b/src/layers/conv.jl index f074e77f..0f9243ef 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -53,42 +53,52 @@ end """ - Maxpool(k) + MaxPool(k) Maxpooling layer. `k` stands for the size of the window for each dimension of the input. Takes the keyword arguments `pad` and `stride`. """ -struct Maxpool{N} +struct MaxPool{N} k::NTuple{N,Int} pad::NTuple{N,Int} stride::NTuple{N,Int} - Maxpool(k::NTuple{N,Int}; pad = map(_->0,k), stride = k) where N = new{N}(k, pad, stride) + MaxPool(k::NTuple{N,Int}; pad = map(_->0,k), stride = k) where N = new{N}(k, pad, stride) end -(m::Maxpool)(x) = maxpool(x, m.k; pad = m.pad, stride = m.stride) +function MaxPool{N}(k::Int; pad = 0, stride = k) where N + k_ = Tuple(repeat([k, ], N)) + MaxPool(k_; pad = map(_->pad,k_), stride=map(_->stride,k_)) +end -function Base.show(io::IO, m::Maxpool) - print(io, "Maxpool(", m.k, ", ", m.pad, ", ", m.stride, ")") +(m::MaxPool)(x) = maxpool(x, m.k; pad = m.pad, stride = m.stride) + +function Base.show(io::IO, m::MaxPool) + print(io, "MaxPool(", m.k, ", ", m.pad, ", ", m.stride, ")") end """ - Meanpool(k) + MeanPool(k) Meanpooling layer. `k` stands for the size of the window for each dimension of the input. Takes the keyword arguments `pad` and `stride`. """ -struct Meanpool{N} +struct MeanPool{N} k::NTuple{N,Int} pad::NTuple{N,Int} stride::NTuple{N,Int} - Meanpool(k::NTuple{N,Int}; pad = map(_->0,k), stride = k) where N = new{N}(k, pad, stride) + MeanPool(k::NTuple{N,Int}; pad = map(_->0,k), stride = k) where N = new{N}(k, pad, stride) end -(m::Meanpool)(x) = meanpool(x, m.k; pad = m.pad, stride = m.stride) - -function Base.show(io::IO, m::Meanpool) - print(io, "Meanpool(", m.k, ", ", m.pad, ", ", m.stride, ")") +function MeanPool{N}(k::Int; pad = 0, stride = k) where N + k_ = Tuple(repeat([k, ], N)) + MeanPool(k_; pad = map(_->pad,k_), stride=map(_->stride,k_)) +end + +(m::MeanPool)(x) = meanpool(x, m.k; pad = m.pad, stride = m.stride) + +function Base.show(io::IO, m::MeanPool) + print(io, "MeanPool(", m.k, ", ", m.pad, ", ", m.stride, ")") end diff --git a/test/layers/conv.jl b/test/layers/conv.jl new file mode 100644 index 00000000..2e5e63dd --- /dev/null +++ b/test/layers/conv.jl @@ -0,0 +1,34 @@ +using Test +using Flux: Chain, Conv, MaxPool, MeanPool +using Base.conv + +@testset "pooling" begin + mp = MaxPool((2, 2)) + + @testset "maxpooling" begin + @test MaxPool{2}(2) == mp + @test MaxPool{2}(2; pad=1, stride=3) == MaxPool((2, 2); pad=(1, 1), stride=(3, 3)) + end + + mp = MeanPool((2, 2)) + + @testset "meanpooling" begin + @test MeanPool{2}(2) == mp + @test MeanPool{2}(2; pad=1, stride=3) == MeanPool((2, 2); pad=(1, 1), stride=(3, 3)) + end +end + +@testset "cnn" begin + r = zeros(28, 28) + m = Chain( + Conv((2, 2), 1=>16, relu), + MaxPool{2}(2), + Conv((2, 2), 16=>8, relu), + MaxPool{2}(2), + x -> reshape(x, :, size(x, 4)), + Dense(288, 10), softmax) + + @testset "inference" begin + @test size(m(r)) == (10, ) + end +end From c035fe22d7012d2d3b0c236513a2a20cea63a8e9 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 24 Aug 2018 13:08:03 +0530 Subject: [PATCH 095/121] added deprecation warning --- src/optimise/Optimise.jl | 2 +- src/optimise/train.jl | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index c4828c9e..9c7fd605 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -20,4 +20,4 @@ using Flux.Tracker: TrackedArray Param(x::TrackedArray) = Param(x.data, x.grad) # Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad) -end +end \ No newline at end of file diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 007613b6..06c8cd0e 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -1,5 +1,6 @@ using Juno using Flux.Tracker: back! +import Base.depwarn runall(f) = f runall(fs::AbstractVector) = () -> foreach(call, fs) @@ -59,7 +60,10 @@ function train!(loss, data, opt; cb = () -> ()) l = loss(d...) @interrupts back!(l) opt() - cb() == :stop && break + if cb() == :stop + depwarn("Use of `:stop` is deprecated; use `Flux.stop()` instead", :stop) + break + end catch ex if ex isa StopException break From e13d28a7a2bc3b845cebb99c147e1a866dfa0ac5 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Fri, 24 Aug 2018 13:44:21 +0100 Subject: [PATCH 096/121] cruft --- src/tracker/array.jl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index ce72755d..cef4463d 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -286,15 +286,6 @@ x::TrackedVector * y::TrackedVector = track(*, x, y) @grad a::AbstractMatrix * b::AbstractVecOrMat = data(a)*data(b), Δ -> (Δ * transpose(b), transpose(a) * Δ) -# @grad function (a::AbstractMatrix * b::AbstractVecOrMat) -# # @show size(a) size(b) -# data(a)*data(b), function (Δ) -# @show size(Δ) size(b) size(Δ*transpose(b)) size(Δ*transpose(data(b))) -# @show typeof(Δ) typeof(b) -# (Δ * transpose(b), transpose(a) * Δ) -# end -# end - # NNlib using NNlib From 86cf22675fd82ddab34d111aa36002a728284f15 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Fri, 24 Aug 2018 14:07:08 +0100 Subject: [PATCH 097/121] rewrite broadcast --- src/tracker/array.jl | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index cef4463d..5e76ddf4 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -327,35 +327,33 @@ end using ForwardDiff: Dual, partials, value -_size(x::AbstractArray) = size(x) -_size(x) = () +trim(x, Δ) = reshape(Δ, ntuple(i -> size(Δ, i), Val(ndims(x)))) -dualify(xs, n) = xs -dualify(xs::AbstractArray, ps) = map(x -> Dual(x, ps), xs) -dualify(xs::Real, ps) = Dual(xs, ps) +unbroadcast(x::AbstractArray, Δ) = + size(x) == size(Δ) ? Δ : + length(x) == length(Δ) ? trim(x, Δ) : + trim(x, sum(Δ, dims = ntuple(i -> size(x, i) == 1 ? i : ndims(Δ)+1, Val(ndims(Δ))))) -unbroadcast(x::Tuple, Δ) = - x == size(Δ) ? Δ : - reshape(sum(Δ, dims = filter(n -> n > length(x) || x[n] == 1, 1:ndims(Δ))), x) +unbroadcast(x::Number, Δ) = sum(Δ) +unbroadcast(x::Base.RefValue{<:Function}, _) = nothing +unbroadcast(x::Base.RefValue{<:Val}, _) = nothing -unbroadcast(x::Tuple{}, Δ) = sum(Δ) +dual(x, p) = x +dual(x::Real, p) = Dual(x, p) -function getpartial(Δ, x, i) - @inbounds p = getindex(partials(x), i) - return Δ * p +function partial(f::F, Δ, i, args::Vararg{Any,N}) where {F,N} + dargs = ntuple(j -> dual(args[j], i==j), Val(N)) + return Δ * f(dargs...).partials[1] end -function ∇broadcast(f, args::Vararg{Any,N}) where N - sizes = _size.(args) - dargs = map((x,i) -> dualify(data(x), ntuple(j -> i==j, Val(N))), args, ntuple(identity, Val(N))) - out = broadcast(f, dargs...) - eltype(out) <: Dual || return out - y = value.(out) - back = function (Δ_) - Δ = data(Δ_) - Δargs = ntuple(i -> getpartial.(Δ, out, i), Val(N)) - dxs = map((x, Δ) -> unbroadcast(x, Δ), sizes, Δargs) - nobacksies(:broadcast, dxs) +@inline function ∇broadcast(f::F, args::Vararg{Any,N}) where {F,N} + y = broadcast(f, data.(args)...) + eltype(y) <: Real || return y + eltype(y) == Bool && return y + function back(Δ) + Δargs = ntuple(i -> partial.(f, data(Δ), i, args...), Val(N)) + dxs = unbroadcast.(args, Δargs) + return nobacksies(:broadcast, dxs) end # So we can return non-tracked arrays track(Call(back, tracker.(args)), y) From 7d6ec2365fcc65cba44deb27f9ea6e1b3f401140 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Fri, 24 Aug 2018 14:30:39 +0100 Subject: [PATCH 098/121] fixes #367 --- src/tracker/array.jl | 2 +- test/cuda/cuda.jl | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 5e76ddf4..728eef29 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -48,7 +48,7 @@ back!(::TrackedArray) = error("Value is not scalar; use `back!(sum(x))` or `back # Fallthrough methods -for f in :[Base.size, Base.ndims].args +for f in :[Base.size, Base.ndims, Base.collect].args @eval @inline $f(x::TrackedArray, a...) = $f(data(x), a...) end diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index 576c0f5d..c9ee95c6 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -26,6 +26,10 @@ x = [1,2,3] cx = gpu(x) @test Flux.crossentropy(x,x) ≈ Flux.crossentropy(cx,cx) +xs = param(rand(5,5)) +ys = Flux.onehotbatch(1:5,1:5) +@test collect(cu(xs) .+ cu(ys)) ≈ collect(xs .+ ys) + c = gpu(Conv((2,2),3=>4)) l = c(gpu(rand(10,10,3,2))) Flux.back!(sum(l)) From 4ac76c35b0ede5d9c7dc1134f732190543eb499f Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Sat, 25 Aug 2018 14:51:40 +0800 Subject: [PATCH 099/121] =?UTF-8?q?fix=20MethodError=20for=20=3D=3D=20and?= =?UTF-8?q?=20=E2=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ```julia param([2]).^2 == [4.0] ERROR: MethodError: ==(::TrackedArray{…,Array{Float64,1}}, ::Array{Float64,1}) is ambiguous. Candidates: ==(x::TrackedArray, y) in Main.Flux.Tracker at /Users/jc/.julia/dev/Flux/src/tracker/array.jl:63 ==(A::AbstractArray, B::AbstractArray) in Base at abstractarray.jl:1686 Possible fix, define ==(::TrackedArray, ::AbstractArray) ``` --- src/tracker/array.jl | 14 ++++++-------- src/tracker/scalar.jl | 8 +++++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 35d2c39f..923b925c 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -1,4 +1,4 @@ -import Base: *, ==, ≈ +import Base: * import LinearAlgebra using Statistics @@ -60,13 +60,11 @@ Base.similar(x::TrackedArray, dims::Union{AbstractUnitRange,Integer}...) = Base.similar(x::TrackedArray, T::Type) = similar(data(x), T) -x::TrackedArray == y = data(x) == y -y == x::TrackedArray = y == data(x) -x::TrackedArray == y::TrackedArray = data(x) == data(y) - -x::TrackedArray ≈ y = data(x) ≈ y -y ≈ x::TrackedArray = y ≈ data(x) -x::TrackedArray ≈ y::TrackedArray = data(x) ≈ data(y) +for op in [:(==), :≈] + @eval Base.$op(x::TrackedArray, y::AbstractArray) = Base.$op(data(x), y) + @eval Base.$op(x::AbstractArray, y::TrackedArray) = Base.$op(x, data(y)) + @eval Base.$op(x::TrackedArray, y::TrackedArray) = Base.$op(data(x), data(y)) +end # Array Stdlib diff --git a/src/tracker/scalar.jl b/src/tracker/scalar.jl index 03892c46..9e987333 100644 --- a/src/tracker/scalar.jl +++ b/src/tracker/scalar.jl @@ -30,9 +30,11 @@ Base.convert(::Type{TrackedReal{T}}, x::Real) where T = TrackedReal(convert(T, x Base.convert(::Type{TrackedReal{T}}, x::TrackedReal{S}) where {T,S} = error("Not implemented: convert tracked $S to tracked $T") -Base.:(<)(x::TrackedReal, y::TrackedReal) = data(x) < data(y) -Base.:(==)(x::TrackedReal, y::TrackedReal) = data(x) == data(y) -Base.:(≈)(x::TrackedReal, y::TrackedReal) = data(x) ≈ data(y) +for op in [:(==), :≈, :<] + @eval Base.$op(x::TrackedReal, y::Number) = Base.$op(data(x), y) + @eval Base.$op(x::Number, y::TrackedReal) = Base.$op(x, data(y)) + @eval Base.$op(x::TrackedReal, y::TrackedReal) = Base.$op(data(x), data(y)) +end Base.eps(x::TrackedReal) = eps(data(x)) From 81811a01ce920af9e99cda1d642773afc673fc73 Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Sat, 25 Aug 2018 14:52:08 +0800 Subject: [PATCH 100/121] =?UTF-8?q?Update=20testset=20for=20=3D=3D,=20?= =?UTF-8?q?=E2=89=88,=20and=20 meanpool(x, (2,2)), rand(10, 10, 3, 2)) @test gradtest(x -> meanpool(x, (2,2,2)), rand(5, 5, 5, 3, 2)) -@test (param([1,2,3]) .< 2) == [true, false, false] +@testset "equality & order" begin + # TrackedReal + @test param(2)^2 == param(4) + @test param(2)^2 == 4 + @test 4 == param(2)^2 -@test param(2)^2 == 4.0 + @test param(2)^2 ≈ param(4) + @test param(2)^2 ≈ 4 + @test 4 ≈ param(2)^2 + + @test (param([1,2,3]) .< 2) == [true, false, false] + @test (param([1,2,3]) .<= 2) == [true, true, false] + @test (2 .> param([1,2,3])) == [true, false, false] + @test (2 .>= param([1,2,3])) == [true, true, false] + + # TrackedArray + @test param([1,2,3]).^2 == param([1,4,9]) + @test [1,2,3].^2 == param([1,4,9]) + @test param([1,2,3]).^2 == [1,4,9] + + @test param([1,2,3]).^2 ≈ param([1,4,9]) + @test [1,2,3].^2 ≈ param([1,4,9]) + @test param([1,2,3]).^2 ≈ [1,4,9] +end @testset "reshape" begin x = reshape(param(rand(2,2,2)), 4, 2) From 0c4fb9655a20030c93efd2bac8671d1c55ee2a5d Mon Sep 17 00:00:00 2001 From: Johnny Chen Date: Sat, 25 Aug 2018 15:12:01 +0800 Subject: [PATCH 101/121] Fix a bug --- src/tracker/scalar.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tracker/scalar.jl b/src/tracker/scalar.jl index 9e987333..81ccb9a3 100644 --- a/src/tracker/scalar.jl +++ b/src/tracker/scalar.jl @@ -31,8 +31,8 @@ Base.convert(::Type{TrackedReal{T}}, x::TrackedReal{S}) where {T,S} = error("Not implemented: convert tracked $S to tracked $T") for op in [:(==), :≈, :<] - @eval Base.$op(x::TrackedReal, y::Number) = Base.$op(data(x), y) - @eval Base.$op(x::Number, y::TrackedReal) = Base.$op(x, data(y)) + @eval Base.$op(x::TrackedReal, y::Real) = Base.$op(data(x), y) + @eval Base.$op(x::Real, y::TrackedReal) = Base.$op(x, data(y)) @eval Base.$op(x::TrackedReal, y::TrackedReal) = Base.$op(data(x), data(y)) end From a964debd8acc0b0f8a4f0d8efc4e8b4a40434eb3 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 28 Aug 2018 15:02:47 +0530 Subject: [PATCH 102/121] fixed example in docs --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 61eb1258..ebcf9c16 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -25,7 +25,7 @@ This would trigger the train loop to stop and exit. # Example callback: cb = function () - accuracy() > 0.9 && stop() + accuracy() > 0.9 && Flux.stop() end ``` """ From 2ca189bc964162b01fc07e9252a62e55b4f3a8b0 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Tue, 28 Aug 2018 10:54:50 +0100 Subject: [PATCH 103/121] newlines --- src/Flux.jl | 2 +- src/optimise/Optimise.jl | 2 +- src/optimise/train.jl | 2 +- src/utils.jl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index e0b40941..768e851c 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -37,4 +37,4 @@ include("data/Data.jl") @init @require CuArrays="3a865a2d-5b23-5a0f-bc46-62713ec82fae" include("cuda/cuda.jl") -end # module \ No newline at end of file +end # module diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index 9c7fd605..c4828c9e 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -20,4 +20,4 @@ using Flux.Tracker: TrackedArray Param(x::TrackedArray) = Param(x.data, x.grad) # Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad) -end \ No newline at end of file +end diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 8a6cf615..09893873 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -93,4 +93,4 @@ macro epochs(n, ex) @info "Epoch $i" $(esc(ex)) end) -end \ No newline at end of file +end diff --git a/src/utils.jl b/src/utils.jl index c6bbdb13..c53f7864 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -152,4 +152,4 @@ function jacobian(m,x) xp.grad .*= 0 # Reset gradient accumulator end J' -end \ No newline at end of file +end From 53be49b102f23c118439dd0fd41713d81dedc771 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Tue, 28 Aug 2018 11:02:38 +0100 Subject: [PATCH 104/121] fix #377 --- src/tracker/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 728eef29..ffa3a89e 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -384,7 +384,7 @@ end using Requires # https://github.com/FluxML/Flux.jl/issues/353 -@init @eval Base.Broadcast begin +@init Requires.isprecompiling() || @eval Base.Broadcast begin function flatten(bc::Broadcasted{Style}) where {Style} isflat(bc) && return bc args = cat_nested(bc) From abcefb8ae30fcd745d1ba313a04f68b584fc5879 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Wed, 29 Aug 2018 18:36:24 +0100 Subject: [PATCH 105/121] fix foldl in tutorial --- docs/src/models/basics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md index da2a125b..88fa0a05 100644 --- a/docs/src/models/basics.md +++ b/docs/src/models/basics.md @@ -172,7 +172,7 @@ using Flux layers = [Dense(10, 5, σ), Dense(5, 2), softmax] -model(x) = foldl((x, m) -> m(x), x, layers) +model(x) = foldl((x, m) -> m(x), layers, init = x) model(rand(10)) # => 2-element vector ``` From a012d0bd513ef7e9ae56c72970aad943b0f1c572 Mon Sep 17 00:00:00 2001 From: Pietro Vertechi Date: Wed, 29 Aug 2018 23:34:41 +0100 Subject: [PATCH 106/121] fix vecnorm in docs --- docs/src/models/regularisation.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/src/models/regularisation.md b/docs/src/models/regularisation.md index cd53544f..370a53d9 100644 --- a/docs/src/models/regularisation.md +++ b/docs/src/models/regularisation.md @@ -1,7 +1,7 @@ # Regularisation Applying regularisation to model parameters is straightforward. We just need to -apply an appropriate regulariser, such as `vecnorm`, to each model parameter and +apply an appropriate regulariser, such as `norm`, to each model parameter and add the result to the overall loss. For example, say we have a simple regression. @@ -15,12 +15,12 @@ loss(x, y) = crossentropy(softmax(m(x)), y) We can regularise this by taking the (L2) norm of the parameters, `m.W` and `m.b`. ```julia -penalty() = vecnorm(m.W) + vecnorm(m.b) +penalty() = norm(m.W) + norm(m.b) loss(x, y) = crossentropy(softmax(m(x)), y) + penalty() ``` When working with layers, Flux provides the `params` function to grab all -parameters at once. We can easily penalise everything with `sum(vecnorm, params)`. +parameters at once. We can easily penalise everything with `sum(norm, params)`. ```julia julia> params(m) @@ -28,7 +28,7 @@ julia> params(m) param([0.355408 0.533092; … 0.430459 0.171498]) param([0.0, 0.0, 0.0, 0.0, 0.0]) -julia> sum(vecnorm, params(m)) +julia> sum(norm, params(m)) 26.01749952921026 (tracked) ``` @@ -40,7 +40,7 @@ m = Chain( Dense(128, 32, relu), Dense(32, 10), softmax) -loss(x, y) = crossentropy(m(x), y) + sum(vecnorm, params(m)) +loss(x, y) = crossentropy(m(x), y) + sum(norm, params(m)) loss(rand(28^2), rand(10)) ``` @@ -57,6 +57,6 @@ julia> activations(c, rand(10)) param([0.0330606, -0.456104]) param([0.61991, 0.38009]) -julia> sum(vecnorm, ans) +julia> sum(norm, ans) 2.639678767773633 (tracked) ``` From 93c4a6b4b5c660956f345d8e0e1871ad880afb8a Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Tue, 4 Sep 2018 13:37:54 +0100 Subject: [PATCH 107/121] fixes #343 --- REQUIRE | 2 +- src/data/mnist.jl | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/REQUIRE b/REQUIRE index 7164de5a..ad3306d6 100644 --- a/REQUIRE +++ b/REQUIRE @@ -4,7 +4,7 @@ MacroTools 0.3.3 NNlib Requires Adapt -GZip +CodecZlib Colors ZipFile AbstractTrees diff --git a/src/data/mnist.jl b/src/data/mnist.jl index c068bc7d..4397618d 100644 --- a/src/data/mnist.jl +++ b/src/data/mnist.jl @@ -1,11 +1,17 @@ module MNIST -using GZip, Colors +using CodecZlib, Colors const Gray = Colors.Gray{Colors.N0f8} const dir = joinpath(@__DIR__, "../../deps/mnist") +function gzopen(f, file) + open(file) do io + f(GzipDecompressorStream(io)) + end +end + function load() mkpath(dir) cd(dir) do @@ -17,7 +23,7 @@ function load() @info "Downloading MNIST dataset" download("https://cache.julialang.org/http://yann.lecun.com/exdb/mnist/$file.gz", "$file.gz") open(file, "w") do io - write(io, GZip.open(read, "$file.gz")) + write(io, gzopen(read, "$file.gz")) end end end From 1e0fd07b097f36ff1d70675256ed6e7c7ed66287 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Tue, 4 Sep 2018 14:30:02 +0100 Subject: [PATCH 108/121] use `expand` --- src/Flux.jl | 2 +- src/layers/conv.jl | 27 ++++++++++----------------- test/layers/conv.jl | 16 ++++------------ 3 files changed, 15 insertions(+), 30 deletions(-) diff --git a/src/Flux.jl b/src/Flux.jl index 614eeaf7..8c959fec 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -5,7 +5,7 @@ module Flux using MacroTools, Juno, Requires, Reexport, Statistics, Random using MacroTools: @forward -export Chain, Dense, RNN, LSTM, GRU, Conv, +export Chain, Dense, RNN, LSTM, GRU, Conv, MaxPool, MeanPool, Dropout, LayerNorm, BatchNorm, params, mapleaves, cpu, gpu diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 5b239751..dbf8ccf9 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,6 +1,6 @@ using NNlib: conv -@generated sub2(::Type{Val{N}}) where N = :(Val($(N-2))) +@generated sub2(::Val{N}) where N = :(Val($(N-2))) expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) @@ -28,7 +28,7 @@ end Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} = - Conv(σ, w, b, expand.(sub2(Val{N}), (stride, pad, dilation))...) + Conv(σ, w, b, expand.(sub2(Val(N)), (stride, pad, dilation))...) Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn, stride = 1, pad = 0, dilation = 1) where N = @@ -55,7 +55,7 @@ end """ MaxPool(k) -Maxpooling layer. `k` stands for the size of the window for each dimension of the input. +Max pooling layer. `k` stands for the size of the window for each dimension of the input. Takes the keyword arguments `pad` and `stride`. """ @@ -63,25 +63,21 @@ struct MaxPool{N} k::NTuple{N,Int} pad::NTuple{N,Int} stride::NTuple{N,Int} - MaxPool(k::NTuple{N,Int}; pad = map(_->0,k), stride = k) where N = new{N}(k, pad, stride) end -function MaxPool{N}(k::Int; pad = 0, stride = k) where N - k_ = Tuple(repeat([k, ], N)) - MaxPool(k_; pad = map(_->pad,k_), stride=map(_->stride,k_)) -end +MaxPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N = + MaxPool(k, expand(Val(N), pad), expand(Val(N), stride)) (m::MaxPool)(x) = maxpool(x, m.k; pad = m.pad, stride = m.stride) function Base.show(io::IO, m::MaxPool) - print(io, "MaxPool(", m.k, ", ", m.pad, ", ", m.stride, ")") + print(io, "MaxPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")") end - """ MeanPool(k) -Meanpooling layer. `k` stands for the size of the window for each dimension of the input. +Mean pooling layer. `k` stands for the size of the window for each dimension of the input. Takes the keyword arguments `pad` and `stride`. """ @@ -89,16 +85,13 @@ struct MeanPool{N} k::NTuple{N,Int} pad::NTuple{N,Int} stride::NTuple{N,Int} - MeanPool(k::NTuple{N,Int}; pad = map(_->0,k), stride = k) where N = new{N}(k, pad, stride) end -function MeanPool{N}(k::Int; pad = 0, stride = k) where N - k_ = Tuple(repeat([k, ], N)) - MeanPool(k_; pad = map(_->pad,k_), stride=map(_->stride,k_)) -end +MeanPool(k::NTuple{N,Integer}; pad = 0, stride = k) where N = + MeanPool(k, expand(Val(N), pad), expand(Val(N), stride)) (m::MeanPool)(x) = meanpool(x, m.k; pad = m.pad, stride = m.stride) function Base.show(io::IO, m::MeanPool) - print(io, "MeanPool(", m.k, ", ", m.pad, ", ", m.stride, ")") + print(io, "MeanPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")") end diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 2e5e63dd..07b8c290 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -1,21 +1,13 @@ using Test -using Flux: Chain, Conv, MaxPool, MeanPool +using Flux: Chain, Conv, MaxPool, MeanPool, maxpool, meanpool using Base.conv @testset "pooling" begin + x = randn(10, 10, 3, 2) mp = MaxPool((2, 2)) - - @testset "maxpooling" begin - @test MaxPool{2}(2) == mp - @test MaxPool{2}(2; pad=1, stride=3) == MaxPool((2, 2); pad=(1, 1), stride=(3, 3)) - end - + @test mp(x) == maxpool(x, (2,2)) mp = MeanPool((2, 2)) - - @testset "meanpooling" begin - @test MeanPool{2}(2) == mp - @test MeanPool{2}(2; pad=1, stride=3) == MeanPool((2, 2); pad=(1, 1), stride=(3, 3)) - end + @test mp(x) == meanpool(x, (2,2)) end @testset "cnn" begin From 1e90226077457249af527f69d9fb6018f21dc2e4 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Tue, 4 Sep 2018 14:35:20 +0100 Subject: [PATCH 109/121] actually run tests --- test/layers/conv.jl | 29 +++++++++++++---------------- test/runtests.jl | 1 + 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 07b8c290..5928bd75 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -1,8 +1,7 @@ -using Test -using Flux: Chain, Conv, MaxPool, MeanPool, maxpool, meanpool -using Base.conv +using Flux, Test +using Flux: maxpool, meanpool -@testset "pooling" begin +@testset "Pooling" begin x = randn(10, 10, 3, 2) mp = MaxPool((2, 2)) @test mp(x) == maxpool(x, (2,2)) @@ -10,17 +9,15 @@ using Base.conv @test mp(x) == meanpool(x, (2,2)) end -@testset "cnn" begin - r = zeros(28, 28) - m = Chain( - Conv((2, 2), 1=>16, relu), - MaxPool{2}(2), - Conv((2, 2), 16=>8, relu), - MaxPool{2}(2), - x -> reshape(x, :, size(x, 4)), - Dense(288, 10), softmax) +@testset "CNN" begin + r = zeros(28, 28, 1, 5) + m = Chain( + Conv((2, 2), 1=>16, relu), + MaxPool((2,2)), + Conv((2, 2), 16=>8, relu), + MaxPool((2,2)), + x -> reshape(x, :, size(x, 4)), + Dense(288, 10), softmax) - @testset "inference" begin - @test size(m(r)) == (10, ) - end + @test size(m(r)) == (10, 5) end diff --git a/test/runtests.jl b/test/runtests.jl index fd48e547..70d929bf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -28,6 +28,7 @@ include("onehot.jl") include("tracker.jl") include("layers/normalisation.jl") include("layers/stateless.jl") +include("layers/conv.jl") include("optimise.jl") include("data.jl") From 8b71350878667538fd3024d81dad760c92988b1b Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 5 Sep 2018 15:39:00 +0100 Subject: [PATCH 110/121] make travis happy maybe --- test/cuda/cuda.jl | 2 +- test/runtests.jl | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index c9ee95c6..16f90e89 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -1,7 +1,7 @@ using Flux, Flux.Tracker, CuArrays, Test using Flux: gpu -@info "Testing Flux/GPU" +@info "Testing GPU Support" @testset "CuArrays" begin diff --git a/test/runtests.jl b/test/runtests.jl index 70d929bf..7a55dca6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -23,14 +23,22 @@ insert!(LOAD_PATH, 2, "@v#.#") @testset "Flux" begin +@info "Testing Basics" + include("utils.jl") include("onehot.jl") -include("tracker.jl") +include("optimise.jl") +include("data.jl") + +@info "Testing Layers" + include("layers/normalisation.jl") include("layers/stateless.jl") include("layers/conv.jl") -include("optimise.jl") -include("data.jl") + +@info "Running Gradient Checks" + +include("tracker.jl") if Base.find_package("CuArrays") != nothing include("cuda/cuda.jl") From ec16a2c77dbf6ab8b92b0eecd11661be7a62feef Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 5 Sep 2018 15:55:08 +0100 Subject: [PATCH 111/121] todone: nicer syntax on 0.7 --- src/layers/recurrent.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl index 4064ed7b..3b40af04 100644 --- a/src/layers/recurrent.jl +++ b/src/layers/recurrent.jl @@ -128,8 +128,7 @@ function LSTMCell(in::Integer, out::Integer; return cell end -function (m::LSTMCell)(h_, x) - h, c = h_ # TODO: nicer syntax on 0.7 +function (m::LSTMCell)((h, c), x) b, o = m.b, size(h, 1) g = m.Wi*x .+ m.Wh*h .+ b input = σ.(gate(g, o, 1)) From b7eaf393fc5cd3a77a5b5959c25813edec947661 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 5 Sep 2018 16:01:57 +0100 Subject: [PATCH 112/121] docs updates --- docs/src/index.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index afeb2075..d381b194 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,18 +1,17 @@ # Flux: The Julia Machine Learning Library -Flux is a library for machine learning. It comes "batteries-included" with many useful tools built in, but also lets you use the full power of the Julia language where you need it. The whole stack is implemented in clean Julia code (right down to the [GPU kernels](https://github.com/FluxML/CuArrays.jl)) and any part can be tweaked to your liking. +Flux is a library for machine learning. It comes "batteries-included" with many useful tools built in, but also lets you use the full power of the Julia language where you need it. We follow a few key principles: + +* **Doing the obvious thing**. Flux has relatively few explicit APIs for features like regularisation or embeddings. Instead, writing down the mathematical form will work – and be fast. +* **You could have written Flux**. All of it, from [LSTMs](https://github.com/FluxML/Flux.jl/blob/ec16a2c77dbf6ab8b92b0eecd11661be7a62feef/src/layers/recurrent.jl#L131) to [GPU kernels](https://github.com/JuliaGPU/CuArrays.jl), is straightforward Julia code. When it doubt, it’s well worth looking at [the source](https://github.com/FluxML/Flux.jl/). If you need something different, you can easily roll your own. +* **Play nicely with others**. Flux works well with Julia libraries from [data frames](https://github.com/JuliaComputing/JuliaDB.jl) and [images](https://github.com/JuliaImages/Images.jl) to [differential equation solvers](https://github.com/JuliaDiffEq/DifferentialEquations.jl), so you can easily build complex data processing pipelines that integrate Flux models. # Installation -Install [Julia 0.6.0 or later](https://julialang.org/downloads/), if you haven't already. +Download [Julia 1.0](https://julialang.org/) or later, if you haven't already. You can add Flux from using Julia's package manager, by typing `] add Flux` in the Julia prompt. -```julia -Pkg.add("Flux") -# Optional but recommended -Pkg.update() # Keep your packages up to date -Pkg.test("Flux") # Check things installed correctly -``` +If you have CUDA you can also run `] add CuArrays` to get GPU support; see [here](gpu.md) for more details. -Start with the [basics](models/basics.md). The [model zoo](https://github.com/FluxML/model-zoo/) is also a good starting point for many common kinds of models. +# Learning Flux -See [GPU support](gpu.md) for more details on installing and using Flux with GPUs. +There are several different ways to learn Flux. If you just want to get started writing models, the [model zoo](https://github.com/FluxML/model-zoo/) gives good starting points for many common ones. This documentation provides a reference to all of Flux's APIs, as well as a from-scratch introduction to Flux's take on models and how they work. Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts. From 193c4ded19290197fb27a4b058cffd34891073b6 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 5 Sep 2018 16:52:50 +0100 Subject: [PATCH 113/121] make docs on 1.0 --- .travis.yml | 4 ++-- docs/make.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9bf07dd6..b26597e9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,5 +15,5 @@ matrix: allow_failures: - julia: nightly after_success: - - julia -e 'Pkg.add("Documenter")' - - julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))' + - julia -e 'using Pkg; Pkg.add("Documenter"); Pkg.add("NNlib")' + - julia -e 'using Pkg; cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))' diff --git a/docs/make.jl b/docs/make.jl index ed6a8c8b..b35beb3c 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -26,6 +26,6 @@ deploydocs( repo = "github.com/FluxML/Flux.jl.git", target = "build", osname = "linux", - julia = "0.6", + julia = "1.0", deps = nothing, make = nothing) From 395a35d137eccc5fc97d43b6b468c50953b77517 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Wed, 5 Sep 2018 17:03:41 +0100 Subject: [PATCH 114/121] better headings --- docs/src/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index d381b194..4fc58f72 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -6,12 +6,12 @@ Flux is a library for machine learning. It comes "batteries-included" with many * **You could have written Flux**. All of it, from [LSTMs](https://github.com/FluxML/Flux.jl/blob/ec16a2c77dbf6ab8b92b0eecd11661be7a62feef/src/layers/recurrent.jl#L131) to [GPU kernels](https://github.com/JuliaGPU/CuArrays.jl), is straightforward Julia code. When it doubt, it’s well worth looking at [the source](https://github.com/FluxML/Flux.jl/). If you need something different, you can easily roll your own. * **Play nicely with others**. Flux works well with Julia libraries from [data frames](https://github.com/JuliaComputing/JuliaDB.jl) and [images](https://github.com/JuliaImages/Images.jl) to [differential equation solvers](https://github.com/JuliaDiffEq/DifferentialEquations.jl), so you can easily build complex data processing pipelines that integrate Flux models. -# Installation +## Installation Download [Julia 1.0](https://julialang.org/) or later, if you haven't already. You can add Flux from using Julia's package manager, by typing `] add Flux` in the Julia prompt. If you have CUDA you can also run `] add CuArrays` to get GPU support; see [here](gpu.md) for more details. -# Learning Flux +## Learning Flux There are several different ways to learn Flux. If you just want to get started writing models, the [model zoo](https://github.com/FluxML/model-zoo/) gives good starting points for many common ones. This documentation provides a reference to all of Flux's APIs, as well as a from-scratch introduction to Flux's take on models and how they work. Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts. From 6bbed07e96048503fad3dfd9dd3000b37781506c Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 7 Sep 2018 02:05:03 +0100 Subject: [PATCH 115/121] enable nested broadcast --- src/tracker/array.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index ffa3a89e..16f91d22 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -351,9 +351,9 @@ end eltype(y) <: Real || return y eltype(y) == Bool && return y function back(Δ) - Δargs = ntuple(i -> partial.(f, data(Δ), i, args...), Val(N)) - dxs = unbroadcast.(args, Δargs) - return nobacksies(:broadcast, dxs) + Δargs = ntuple(i -> partial.(f, Δ, i, args...), Val(N)) + dxs = map(unbroadcast, args, Δargs) + return dxs end # So we can return non-tracked arrays track(Call(back, tracker.(args)), y) From e7783ace12908712342e5d0760cf3c6ff468888f Mon Sep 17 00:00:00 2001 From: James Bradbury Date: Thu, 6 Sep 2018 18:38:11 -0700 Subject: [PATCH 116/121] 1.0 compat for `normalise` --- src/layers/stateless.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 6d040f4f..891ec230 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -47,7 +47,7 @@ logitbinarycrossentropy(logŷ, y) = (1 - y)*logŷ - logσ(logŷ) Normalise each column of `x` to mean 0 and standard deviation 1. """ function normalise(x::AbstractVecOrMat) - μ′ = mean(x, 1) - σ′ = std(x, 1, mean = μ′) + μ′ = mean(x, dims = 1) + σ′ = std(x, dims = 1, mean = μ′) return (x .- μ′) ./ σ′ end From 8b9a98ed0129efb87a8a1f4d63e5c49b33c85869 Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Tue, 11 Sep 2018 18:58:07 +0530 Subject: [PATCH 117/121] The sample gradient should not use the softdash While softdash is a very natural and mathematical way of representation, it can be very easily confused with the apostrophe used for LinAlg adjoint. Not worth and unnecessary confusion in a first example of the code. --- docs/src/models/basics.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md index 88fa0a05..a0a39ab5 100644 --- a/docs/src/models/basics.md +++ b/docs/src/models/basics.md @@ -10,14 +10,14 @@ using Flux.Tracker f(x) = 3x^2 + 2x + 1 # df/dx = 6x + 2 -f′(x) = Tracker.gradient(f, x)[1] +df(x) = Tracker.gradient(f, x)[1] -f′(2) # 14.0 (tracked) +df(2) # 14.0 (tracked) # d²f/dx² = 6 -f′′(x) = Tracker.gradient(f′, x)[1] +d2f(x) = Tracker.gradient(df, x)[1] -f′′(2) # 6.0 (tracked) +d2f(2) # 6.0 (tracked) ``` (We'll learn more about why these numbers show up as `(tracked)` below.) From d797999fc5353a2b4973b606872f7dbd1bb86af6 Mon Sep 17 00:00:00 2001 From: Mike Innes Date: Fri, 14 Sep 2018 18:10:24 +0100 Subject: [PATCH 118/121] fix sentiment model --- src/data/sentiment.jl | 9 +++++---- test/data.jl | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/data/sentiment.jl b/src/data/sentiment.jl index a58cd9d4..56c9e8ea 100644 --- a/src/data/sentiment.jl +++ b/src/data/sentiment.jl @@ -4,7 +4,7 @@ using ZipFile using ..Data: deps function load() - isfile(deps("sentiment.zip")) || return + isfile(deps("sentiment.zip")) && return @info "Downloading sentiment treebank dataset" download("https://cache.julialang.org/https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip", deps("sentiment.zip")) @@ -26,9 +26,10 @@ totree_(n, a, b) = Tree{Any}((parse(Int, n), nothing), totree(a), totree(b)) totree(t::Expr) = totree_(t.args...) function parsetree(s) - s = replace(s, r"\$", s -> "\\\$") - s = replace(s, r"[^\s\(\)]+", s -> "\"$s\"") - s = replace(s, " ", ", ") + s = replace(s, "\\" => "") + s = replace(s, "\$" => "\\\$") + s = replace(s, r"[^ \n\(\)]+" => s -> "\"$s\"") + s = replace(s, " " => ", ") return totree(Meta.parse(s)) end diff --git a/test/data.jl b/test/data.jl index 7a27c651..9c2901cb 100644 --- a/test/data.jl +++ b/test/data.jl @@ -9,3 +9,5 @@ using Test @test MNIST.images()[1] isa Matrix @test MNIST.labels() isa Vector{Int64} + +@test Data.Sentiment.train() isa Vector{Data.Tree{Any}} From e803117e2591b9dc5a074bfacb49ca1aa72295dd Mon Sep 17 00:00:00 2001 From: Isaac Tay Date: Sat, 15 Sep 2018 16:45:04 +0800 Subject: [PATCH 119/121] updated loadparams! function --- src/treelike.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelike.jl b/src/treelike.jl index 3d83d448..9b3518d3 100644 --- a/src/treelike.jl +++ b/src/treelike.jl @@ -54,7 +54,7 @@ function loadparams!(m, xs) for (p, x) in zip(params(m), xs) size(p) == size(x) || error("Expected param size $(size(p)), got $(size(x))") - copy!(data(p), data(x)) + copyto!(data(p), data(x)) end end From d1318535878e9bc9edb3157f0cda2442a1680c14 Mon Sep 17 00:00:00 2001 From: Alex Bird Date: Wed, 19 Sep 2018 13:08:30 +0100 Subject: [PATCH 120/121] add inv/ldivide/rdivide + test --- src/tracker/array.jl | 37 +++++++++++++++++++++++++++++++++++++ test/tracker.jl | 5 +++++ 2 files changed, 42 insertions(+) diff --git a/src/tracker/array.jl b/src/tracker/array.jl index 6d3c3b3f..3d9836d0 100644 --- a/src/tracker/array.jl +++ b/src/tracker/array.jl @@ -1,6 +1,8 @@ import Base: * import LinearAlgebra +import LinearAlgebra: inv, \, / + using Statistics using LinearAlgebra: Transpose, Adjoint, diagm, diag @@ -205,6 +207,41 @@ Base.kron(a::TrackedMatrix, b::TrackedMatrix) = _kron(a, b) Base.kron(a::TrackedMatrix, b::AbstractMatrix) = _kron(a, b) Base.kron(a::AbstractMatrix, b::TrackedMatrix) = _kron(a, b) + +inv(A::TrackedArray) = Tracker.track(inv, A) +@grad function inv(A) + return inv(Tracker.data(A)), function (Δ) + Ainv = inv(A) + ∇A = - Ainv' * Δ * Ainv' + return (∇A, ) + end +end + +# (/) rdivide +A::TrackedArray / B::TrackedArray = Tracker.track(/, A, B) +A::AbstractVecOrMat / B::TrackedArray = Tracker.track(/, A, B) +A::TrackedArray / B::AbstractVecOrMat = Tracker.track(/, A, B) +@grad function (A / B) + return Tracker.data(A) / Tracker.data(B), function (Δ) + Binv = inv(B) + ∇B = - Binv' * A' * Δ * Binv' + return (Δ * Binv', ∇B) + end +end + +# (\) ldivide (left vec divide needs more work to resolve dispatch ambiguity) +A::TrackedArray \ B::TrackedArray = Tracker.track(\, A, B) +A::AbstractArray \ B::TrackedArray = Tracker.track(\, A, B) +A::TrackedArray \ B::AbstractVecOrMat = Tracker.track(\, A, B) +@grad function (A \ B) + return Tracker.data(A) \ Tracker.data(B), function (Δ) + Ainv = inv(A) + ∇A = - Ainv' * Δ * B' * Ainv' + return (∇A, Ainv' * Δ) + end +end + + # Reductions Base.sum(xs::TrackedArray; dims = :) = track(sum, xs, dims = dims) diff --git a/test/tracker.jl b/test/tracker.jl index 9a4cb793..a4772f2e 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -129,6 +129,11 @@ end @test gradtest(f-> Matrix(Diagonal(f)), rand(3)) +@test gradtest(W -> inv(log.(W * W)), (5,5)) +@test gradtest((A, B) -> A / B , (1,5), (5,5)) +@test gradtest((A, B) -> log.(A * A) / exp.(B * B), (5,5), (5,5)) +@test gradtest((A, B) -> log.(A * A) \ exp.(B * B), (5,5), (5,5)) + @testset "mean" begin @test gradtest(mean, rand(2, 3)) From 079614adb21dbbce878b5b1d0fb065332bb6651f Mon Sep 17 00:00:00 2001 From: Harry Date: Wed, 19 Sep 2018 16:45:11 +0100 Subject: [PATCH 121/121] Fix typo --- docs/src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 4fc58f72..4b5668a1 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -3,7 +3,7 @@ Flux is a library for machine learning. It comes "batteries-included" with many useful tools built in, but also lets you use the full power of the Julia language where you need it. We follow a few key principles: * **Doing the obvious thing**. Flux has relatively few explicit APIs for features like regularisation or embeddings. Instead, writing down the mathematical form will work – and be fast. -* **You could have written Flux**. All of it, from [LSTMs](https://github.com/FluxML/Flux.jl/blob/ec16a2c77dbf6ab8b92b0eecd11661be7a62feef/src/layers/recurrent.jl#L131) to [GPU kernels](https://github.com/JuliaGPU/CuArrays.jl), is straightforward Julia code. When it doubt, it’s well worth looking at [the source](https://github.com/FluxML/Flux.jl/). If you need something different, you can easily roll your own. +* **You could have written Flux**. All of it, from [LSTMs](https://github.com/FluxML/Flux.jl/blob/ec16a2c77dbf6ab8b92b0eecd11661be7a62feef/src/layers/recurrent.jl#L131) to [GPU kernels](https://github.com/JuliaGPU/CuArrays.jl), is straightforward Julia code. When in doubt, it’s well worth looking at [the source](https://github.com/FluxML/Flux.jl/). If you need something different, you can easily roll your own. * **Play nicely with others**. Flux works well with Julia libraries from [data frames](https://github.com/JuliaComputing/JuliaDB.jl) and [images](https://github.com/JuliaImages/Images.jl) to [differential equation solvers](https://github.com/JuliaDiffEq/DifferentialEquations.jl), so you can easily build complex data processing pipelines that integrate Flux models. ## Installation