From d949b31aa5548e4fb87625def9662765a92879c9 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 14 Dec 2017 18:48:38 +0000 Subject: [PATCH 01/10] conv gradient --- src/tracker/lib.jl | 12 +++++++++++- src/tracker/numeric.jl | 2 +- test/tracker.jl | 3 +++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/tracker/lib.jl b/src/tracker/lib.jl index f3221bd8..cac8d7d1 100644 --- a/src/tracker/lib.jl +++ b/src/tracker/lib.jl @@ -123,12 +123,22 @@ end # NNlib -import NNlib: softmax, ∇softmax +using NNlib +import NNlib: softmax, ∇softmax, conv2d softmax(xs::TrackedArray) = TrackedArray(Call(softmax, xs)) back(::typeof(softmax), Δ, xs) = @back(xs, ∇softmax(Δ, data(xs))) +conv2d(x::TrackedArray{<:Any,4}, w::TrackedArray{<:Any,4}) = TrackedArray(Call(conv2d, x, w)) +conv2d(x::AbstractArray{<:Any,4}, w::TrackedArray{<:Any,4}) = TrackedArray(Call(conv2d, x, w)) +conv2d(x::TrackedArray{<:Any,4}, w::AbstractArray{<:Any,4}) = TrackedArray(Call(conv2d, x, w)) + +function back(::typeof(conv2d), Δ, x, w) + @back(x, NNlib.conv2d_grad_x(data(x), data(w), Δ)) + @back(w, NNlib.conv2d_grad_w(data(x), data(w), Δ)) +end + # Broadcasting using ForwardDiff: Dual, partials diff --git a/src/tracker/numeric.jl b/src/tracker/numeric.jl index 68211aa3..cbcd3ad8 100644 --- a/src/tracker/numeric.jl +++ b/src/tracker/numeric.jl @@ -19,4 +19,4 @@ function ngradient(f, xs::AbstractArray...) return grads end -gradcheck(f, xs...) = all(isapprox.(ngradient(f, xs...), gradient(f, xs...), rtol = 1e-6)) +gradcheck(f, xs...) = all(isapprox.(ngradient(f, xs...), gradient(f, xs...), rtol = 1e-5)) diff --git a/test/tracker.jl b/test/tracker.jl index ac031915..a3d9563b 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -1,5 +1,6 @@ using Flux.Tracker, Base.Test, NNlib using Flux.Tracker: gradcheck +using NNlib gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(f(xs...)), xs...) gradtest(f, dims...) = gradtest(f, rand.(dims)...) @@ -45,4 +46,6 @@ end 2y + x end +@test gradtest(conv2d, rand(10, 10, 3, 2), randn(2, 2, 3, 2)) + end #testset From 0bf22dfb8ea37332543867dc00431e22313f61b5 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 15 Dec 2017 02:29:14 +0000 Subject: [PATCH 02/10] pool gradients --- src/tracker/back.jl | 9 +++++---- src/tracker/lib.jl | 10 +++++++++- test/tracker.jl | 2 ++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/tracker/back.jl b/src/tracker/back.jl index 39810069..d6a48409 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -12,16 +12,17 @@ function scan(x::TrackedArray) return end -back(c::Call, Δ) = back(c.func, Δ, c.args...) -back(::Call{Void}, Δ) = nothing +back_(f, y, args...) = back(f, args...) +back_(c::Call, y, Δ) = back_(c.func, y, Δ, c.args...) +back_(::Call{Void}, y, Δ) = nothing function back(x::TrackedArray, Δ) ref = x.ref -= 1 if isdefined(x, :grad) x.grad .+= Δ - ref == 0 && back(x.f, x.grad) + ref == 0 && back_(x.f, x.data, x.grad) else - ref == 0 && back(x.f, Δ) + ref == 0 && back_(x.f, x.data, Δ) end return end diff --git a/src/tracker/lib.jl b/src/tracker/lib.jl index cac8d7d1..57474933 100644 --- a/src/tracker/lib.jl +++ b/src/tracker/lib.jl @@ -124,7 +124,7 @@ end # NNlib using NNlib -import NNlib: softmax, ∇softmax, conv2d +import NNlib: softmax, ∇softmax, conv2d, pool softmax(xs::TrackedArray) = TrackedArray(Call(softmax, xs)) @@ -139,6 +139,14 @@ function back(::typeof(conv2d), Δ, x, w) @back(w, NNlib.conv2d_grad_w(data(x), data(w), Δ)) end +_pool(x, k, mode) = pool(x, window = k, mode = mode) + +pool(x::TrackedArray{<:Any,4}; window = 2, mode = 0) = + TrackedArray(Call(_pool, x, window, mode)) + +back_(::typeof(_pool), y, Δ, x, k, mode) = + back(x, NNlib.pool_grad(data(x), y, Δ, window = k, mode = mode)) + # Broadcasting using ForwardDiff: Dual, partials diff --git a/test/tracker.jl b/test/tracker.jl index a3d9563b..dc11420b 100644 --- a/test/tracker.jl +++ b/test/tracker.jl @@ -47,5 +47,7 @@ end end @test gradtest(conv2d, rand(10, 10, 3, 2), randn(2, 2, 3, 2)) +@test gradtest(x -> maxpool2d(x, 2), rand(10, 10, 3, 2)) +@test gradtest(x -> avgpool2d(x, 2), rand(10, 10, 3, 2)) end #testset From 9d0dd9fb7e2a49c23783ed6b47a10eed97865f74 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 15 Dec 2017 13:22:57 +0000 Subject: [PATCH 03/10] layer wip --- src/Flux.jl | 6 ++++-- src/layers/conv.jl | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 src/layers/conv.jl diff --git a/src/Flux.jl b/src/Flux.jl index 526d6bb8..2acdb177 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -7,13 +7,14 @@ module Flux using Juno, Requires using Lazy: @forward -export Chain, Dense, RNN, LSTM, +export Chain, Dense, RNN, LSTM, Conv2D, Dropout, LayerNorm, BatchNorm, SGD, ADAM, Momentum, Nesterov, AMSGrad, param, params, mapleaves using NNlib -export σ, sigmoid, relu, leakyrelu, elu, swish, softmax +export σ, sigmoid, relu, leakyrelu, elu, swish, softmax, + conv2d, maxpool2d, avgpool2d include("tracker/Tracker.jl") using .Tracker @@ -27,6 +28,7 @@ include("treelike.jl") include("layers/stateless.jl") include("layers/basic.jl") +include("layers/conv.jl") include("layers/recurrent.jl") include("layers/normalisation.jl") diff --git a/src/layers/conv.jl b/src/layers/conv.jl new file mode 100644 index 00000000..f7ca6f02 --- /dev/null +++ b/src/layers/conv.jl @@ -0,0 +1,14 @@ +struct Conv2D{F,A} + σ::F + weight::A + stride::Int +end + +Conv2D(k::NTuple{2,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + init = initn, stride = 1) = + Conv2D(σ, param(initn(k..., ch...)), stride) + +Flux.treelike(Conv2D) + +# (c::Conv2D)(x) = c.σ.(conv2d(x, c.weight, stride = c.stride)) +(c::Conv2D)(x) = c.σ.(conv2d(x, c.weight)) From 9b833a434525bc7afc00dd95c3799b71784f84d1 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 15 Dec 2017 16:17:39 +0000 Subject: [PATCH 04/10] more onehot indexing --- src/onehot.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/onehot.jl b/src/onehot.jl index f94fb93e..4f121958 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -18,7 +18,9 @@ end Base.size(xs::OneHotMatrix) = (Int64(xs.height),length(xs.data)) -Base.getindex(xs::OneHotMatrix, i::Int, j::Int) = xs.data[j][i] +Base.getindex(xs::OneHotMatrix, i::Integer, j::Integer) = xs.data[j][i] +Base.getindex(xs::OneHotMatrix, ::Colon, i::Integer) = xs.data[i] +Base.getindex(xs::OneHotMatrix, ::Colon, i::AbstractArray) = OneHotMatrix(xs.height, xs.data[i]) A::AbstractMatrix * B::OneHotMatrix = A[:, map(x->x.ix, B.data)] From 6890a615879464aa0a2f4efdee9cb4406eb14e9f Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 15 Dec 2017 16:17:45 +0000 Subject: [PATCH 05/10] todo --- src/tracker/back.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tracker/back.jl b/src/tracker/back.jl index d6a48409..b4cd27c6 100644 --- a/src/tracker/back.jl +++ b/src/tracker/back.jl @@ -36,6 +36,9 @@ end # Interface methods +# TODO: if an error occurs in `back` the refcounts will be broken +# and `back` will silently fail to update. + function back!(x::TrackedArray, Δ) scan(x) back(x, Δ) From 73ae25289d9b902fab998de686b31a4005ea2858 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 15 Dec 2017 16:18:01 +0000 Subject: [PATCH 06/10] remove old util --- src/utils.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index afe926d9..bba3e416 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -4,8 +4,6 @@ initn(dims...) = randn(dims...)/100 glorot_uniform(dims...) = (rand(dims...) - 0.5)*sqrt(24.0/(sum(dims))) glorot_normal(dims...) = (randn(dims...)*sqrt(2.0/sum(dims))) -flatten(xs) = reshape(xs, size(xs, 1), :) - unsqueeze(xs, dim) = reshape(xs, (size(xs)[1:dim-1]..., 1, size(xs)[dim:end]...)) stack(xs, dim) = cat(dim, unsqueeze.(xs, dim)...) From 386eafc44393c4ad2d2c9a60438d6355e1702760 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 15 Dec 2017 16:18:16 +0000 Subject: [PATCH 07/10] reshape --- src/tracker/lib.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/tracker/lib.jl b/src/tracker/lib.jl index 57474933..71d93e88 100644 --- a/src/tracker/lib.jl +++ b/src/tracker/lib.jl @@ -44,6 +44,12 @@ function back(::typeof(vcat), Δ, xs, ys) @back(ys, Δ[size(xs,1)+1:end, i...]) end +Base.reshape(xs::TrackedArray, dims::Union{Colon,Int64}...) = + TrackedArray(Call(reshape, xs, dims...)) + +back(::typeof(reshape), Δ, xs::TrackedArray, _...) = + back(xs, reshape(Δ, size(xs))) + # Reductions Base.sum(xs::TrackedArray, dim) = TrackedArray(Call(sum, xs, dim)) From 51f93d9f0e0d3393da4adcde58ba4eb7e12225b0 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 15 Dec 2017 16:24:45 +0000 Subject: [PATCH 08/10] conv polish --- src/layers/conv.jl | 10 ++++++++-- src/tracker/lib.jl | 17 +++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index f7ca6f02..82d90029 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -10,5 +10,11 @@ Conv2D(k::NTuple{2,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; Flux.treelike(Conv2D) -# (c::Conv2D)(x) = c.σ.(conv2d(x, c.weight, stride = c.stride)) -(c::Conv2D)(x) = c.σ.(conv2d(x, c.weight)) +(c::Conv2D)(x) = c.σ.(conv2d(x, c.weight, stride = c.stride)) + +function Base.show(io::IO, l::Conv2D) + print(io, "Conv2D((", size(l.weight, 1), ", ", size(l.weight, 2), ")") + print(io, ", ", size(l.weight, 3), "=>", size(l.weight, 4)) + l.σ == identity || print(io, ", ", l.σ) + print(io, ")") +end diff --git a/src/tracker/lib.jl b/src/tracker/lib.jl index 71d93e88..580992ef 100644 --- a/src/tracker/lib.jl +++ b/src/tracker/lib.jl @@ -136,13 +136,18 @@ softmax(xs::TrackedArray) = TrackedArray(Call(softmax, xs)) back(::typeof(softmax), Δ, xs) = @back(xs, ∇softmax(Δ, data(xs))) -conv2d(x::TrackedArray{<:Any,4}, w::TrackedArray{<:Any,4}) = TrackedArray(Call(conv2d, x, w)) -conv2d(x::AbstractArray{<:Any,4}, w::TrackedArray{<:Any,4}) = TrackedArray(Call(conv2d, x, w)) -conv2d(x::TrackedArray{<:Any,4}, w::AbstractArray{<:Any,4}) = TrackedArray(Call(conv2d, x, w)) +_conv2d(x, w, stride) = conv2d(x, w, stride = stride) -function back(::typeof(conv2d), Δ, x, w) - @back(x, NNlib.conv2d_grad_x(data(x), data(w), Δ)) - @back(w, NNlib.conv2d_grad_w(data(x), data(w), Δ)) +conv2d(x::TrackedArray{<:Any,4}, w::TrackedArray{<:Any,4}; stride = 1) = + TrackedArray(Call(_conv2d, x, w, stride)) +conv2d(x::AbstractArray{<:Any,4}, w::TrackedArray{<:Any,4}; stride = 1) = + TrackedArray(Call(_conv2d, x, w, stride)) +conv2d(x::TrackedArray{<:Any,4}, w::AbstractArray{<:Any,4}; stride = 1) = + TrackedArray(Call(_conv2d, x, w, stride)) + +function back(::typeof(_conv2d), Δ, x, w, stride) + @back(x, NNlib.conv2d_grad_x(data(x), data(w), Δ; stride = stride)) + @back(w, NNlib.conv2d_grad_w(data(x), data(w), Δ; stride = stride)) end _pool(x, k, mode) = pool(x, window = k, mode = mode) From 269d8f36b9a766301197d0176a405acdd841b890 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Mon, 18 Dec 2017 18:05:38 +0000 Subject: [PATCH 09/10] conv padding --- src/layers/conv.jl | 7 ++++--- src/tracker/lib.jl | 21 +++++++++++---------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 82d90029..d73d1ad9 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -2,15 +2,16 @@ struct Conv2D{F,A} σ::F weight::A stride::Int + pad::Int end Conv2D(k::NTuple{2,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = initn, stride = 1) = - Conv2D(σ, param(initn(k..., ch...)), stride) + init = initn, stride = 1, pad = 0) = + Conv2D(σ, param(initn(k..., ch...)), stride, pad) Flux.treelike(Conv2D) -(c::Conv2D)(x) = c.σ.(conv2d(x, c.weight, stride = c.stride)) +(c::Conv2D)(x) = c.σ.(conv2d(x, c.weight, stride = c.stride, padding = c.pad)) function Base.show(io::IO, l::Conv2D) print(io, "Conv2D((", size(l.weight, 1), ", ", size(l.weight, 2), ")") diff --git a/src/tracker/lib.jl b/src/tracker/lib.jl index 580992ef..2dc25e52 100644 --- a/src/tracker/lib.jl +++ b/src/tracker/lib.jl @@ -136,18 +136,19 @@ softmax(xs::TrackedArray) = TrackedArray(Call(softmax, xs)) back(::typeof(softmax), Δ, xs) = @back(xs, ∇softmax(Δ, data(xs))) -_conv2d(x, w, stride) = conv2d(x, w, stride = stride) +# TODO: can store kwargs efficiently in namedtuples +_conv2d(x, w, stride, pad) = conv2d(x, w, stride = stride, padding = pad) -conv2d(x::TrackedArray{<:Any,4}, w::TrackedArray{<:Any,4}; stride = 1) = - TrackedArray(Call(_conv2d, x, w, stride)) -conv2d(x::AbstractArray{<:Any,4}, w::TrackedArray{<:Any,4}; stride = 1) = - TrackedArray(Call(_conv2d, x, w, stride)) -conv2d(x::TrackedArray{<:Any,4}, w::AbstractArray{<:Any,4}; stride = 1) = - TrackedArray(Call(_conv2d, x, w, stride)) +conv2d(x::TrackedArray{<:Any,4}, w::TrackedArray{<:Any,4}; stride = 1, padding = 0) = + TrackedArray(Call(_conv2d, x, w, stride, padding)) +conv2d(x::AbstractArray{<:Any,4}, w::TrackedArray{<:Any,4}; stride = 1, padding = 0) = + TrackedArray(Call(_conv2d, x, w, stride, padding)) +conv2d(x::TrackedArray{<:Any,4}, w::AbstractArray{<:Any,4}; stride = 1, padding = 0) = + TrackedArray(Call(_conv2d, x, w, stride, padding)) -function back(::typeof(_conv2d), Δ, x, w, stride) - @back(x, NNlib.conv2d_grad_x(data(x), data(w), Δ; stride = stride)) - @back(w, NNlib.conv2d_grad_w(data(x), data(w), Δ; stride = stride)) +function back(::typeof(_conv2d), Δ, x, w, stride, pad) + @back(x, NNlib.conv2d_grad_x(data(x), data(w), Δ; stride = stride, padding = pad)) + @back(w, NNlib.conv2d_grad_w(data(x), data(w), Δ; stride = stride, padding = pad)) end _pool(x, k, mode) = pool(x, window = k, mode = mode) From e3577d759cf2a3b3070333275ffabb3dd5b1a566 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Mon, 18 Dec 2017 18:05:48 +0000 Subject: [PATCH 10/10] conv docs --- docs/src/models/layers.md | 1 + src/layers/conv.jl | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index d92388e1..cb0c6615 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -5,6 +5,7 @@ These core layers form the foundation of almost all neural networks. ```@docs Chain Dense +Conv2D ``` ## Recurrent Layers diff --git a/src/layers/conv.jl b/src/layers/conv.jl index d73d1ad9..e267510b 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,3 +1,15 @@ +""" + Conv2D(size, in=>out) + Conv2d(size, in=>out, relu) + +Standard convolutional layer. `size` should be a tuple like `(2, 2)`. +`in` and `out` specify the number of input and output channels respectively. + +Data should be stored in HWCN order. In other words, a 100×100 RGB image would +be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. + +Takes the keyword arguments `pad` and `stride`. +""" struct Conv2D{F,A} σ::F weight::A