merge conflicts

This commit is contained in:
Dhairya Gandhi 2019-09-24 00:31:44 +05:30
commit 822288d63d
10 changed files with 64 additions and 24 deletions

1
.gitattributes vendored
View File

@ -1 +1,2 @@
paper/* linguist-documentation paper/* linguist-documentation
CITATION.bib linguist-detectable=false

View File

@ -46,9 +46,9 @@ version = "0.6.2"
[[CUDAapi]] [[CUDAapi]]
deps = ["Libdl", "Logging"] deps = ["Libdl", "Logging"]
git-tree-sha1 = "9b2b4b71d6b7f946c9689bb4dea03ff92e3c7091" git-tree-sha1 = "e063efb91cfefd7e6afd92c435d01398107a500b"
uuid = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3" uuid = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3"
version = "1.1.0" version = "1.2.0"
[[CUDAdrv]] [[CUDAdrv]]
deps = ["CUDAapi", "Libdl", "Printf"] deps = ["CUDAapi", "Libdl", "Printf"]
@ -147,9 +147,15 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[FFTW]] [[FFTW]]
deps = ["AbstractFFTs", "BinaryProvider", "Conda", "Libdl", "LinearAlgebra", "Reexport", "Test"] deps = ["AbstractFFTs", "BinaryProvider", "Conda", "Libdl", "LinearAlgebra", "Reexport", "Test"]
<<<<<<< HEAD
git-tree-sha1 = "03f8776fbdae28c20c0d1d2ae4e090cd1dfcd247" git-tree-sha1 = "03f8776fbdae28c20c0d1d2ae4e090cd1dfcd247"
uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
version = "1.0.0" version = "1.0.0"
=======
git-tree-sha1 = "6c5b420da0b8c12098048561b8d58f81adea506f"
uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
version = "1.0.1"
>>>>>>> upstream/master
[[FillArrays]] [[FillArrays]]
deps = ["LinearAlgebra", "Random", "SparseArrays"] deps = ["LinearAlgebra", "Random", "SparseArrays"]
@ -170,9 +176,15 @@ version = "0.10.3"
[[GPUArrays]] [[GPUArrays]]
deps = ["Adapt", "FFTW", "FillArrays", "LinearAlgebra", "Printf", "Random", "Serialization", "StaticArrays", "Test"] deps = ["Adapt", "FFTW", "FillArrays", "LinearAlgebra", "Printf", "Random", "Serialization", "StaticArrays", "Test"]
<<<<<<< HEAD
git-tree-sha1 = "b5009ac44b141ded5e6f04c4db83807970f56e91" git-tree-sha1 = "b5009ac44b141ded5e6f04c4db83807970f56e91"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "1.0.2" version = "1.0.2"
=======
git-tree-sha1 = "77e27264276fe97a7e7fb928bf8999a145abc018"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "1.0.3"
>>>>>>> upstream/master
[[IRTools]] [[IRTools]]
deps = ["InteractiveUtils", "MacroTools", "Test"] deps = ["InteractiveUtils", "MacroTools", "Test"]
@ -388,7 +400,7 @@ version = "0.8.3"
[[Zygote]] [[Zygote]]
deps = ["DiffRules", "FFTW", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] deps = ["DiffRules", "FFTW", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
git-tree-sha1 = "ce6d7142d665b1e4c71c678fa7db4da3bbc6743f" git-tree-sha1 = "38241b40ebd8748bcacad5e6c7ba3ab3cc7a15c9"
repo-rev = "master" repo-rev = "master"
repo-url = "https://github.com/FluxML/Zygote.jl.git" repo-url = "https://github.com/FluxML/Zygote.jl.git"
uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
@ -396,6 +408,8 @@ version = "0.3.4"
[[ZygoteRules]] [[ZygoteRules]]
deps = ["MacroTools"] deps = ["MacroTools"]
git-tree-sha1 = "def5f96ac2895fd9b48435f6b97020979ee0a4c6" git-tree-sha1 = "c4c29b30b8ff3be13d4244e78be7df2a42bc54d0"
repo-rev = "master"
repo-url = "https://github.com/FluxML/ZygoteRules.jl.git"
uuid = "700de1a5-db45-46bc-99cf-38207098b444" uuid = "700de1a5-db45-46bc-99cf-38207098b444"
version = "0.1.0" version = "0.2.0"

View File

@ -24,6 +24,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444"
[compat] [compat]
CUDAapi = "1.1" CUDAapi = "1.1"

View File

@ -5,7 +5,7 @@ Consider a [simple linear regression](../models/basics.md). We create some dummy
```julia ```julia
using Flux using Flux
W = rand(2, 5)) W = rand(2, 5)
b = rand(2) b = rand(2)
predict(x) = (W * x) .+ b predict(x) = (W * x) .+ b
@ -15,7 +15,7 @@ x, y = rand(5), rand(2) # Dummy data
l = loss(x, y) # ~ 3 l = loss(x, y) # ~ 3
θ = Params([W, b]) θ = Params([W, b])
grads = Zygote.gradient(() -> loss(x, y), θ) grads = gradient(() -> loss(x, y), θ)
``` ```
We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here's one way to do that: We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here's one way to do that:

View File

@ -6,7 +6,7 @@ using Base: tail
using Zygote, MacroTools, Juno, Reexport, Statistics, Random using Zygote, MacroTools, Juno, Reexport, Statistics, Random
using MacroTools: @forward using MacroTools: @forward
@reexport using NNlib @reexport using NNlib
using Zygote: Params, @adjoint, gradient, forward using Zygote: Params, @adjoint, gradient, pullback
export gradient export gradient
export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool, export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool,

View File

@ -271,7 +271,8 @@ function desc(rnn)
return d return d
end end
using ..Flux: @adjoint import Zygote
using Zygote: @adjoint
function (m::CuRNN{T})(h::CuArray{T}, x::CuArray{T}) where T <: Union{Float32,Float64} function (m::CuRNN{T})(h::CuArray{T}, x::CuArray{T}) where T <: Union{Float32,Float64}
y, h = forward(desc(m), x, h) y, h = forward(desc(m), x, h)
@ -299,15 +300,29 @@ unbroadcast(x::AbstractArray, Δ) =
length(x) == length(Δ) ? trim(x, Δ) : length(x) == length(Δ) ? trim(x, Δ) :
trim(x, sum(Δ, dims = ntuple(i -> size(x, i) == 1 ? i : ndims(Δ)+1, Val(ndims(Δ))))) trim(x, sum(Δ, dims = ntuple(i -> size(x, i) == 1 ? i : ndims(Δ)+1, Val(ndims(Δ)))))
coerce_cuda(x::Union{CuArray,Nothing}) = x
coerce_cuda(x::Tuple) = coerce_cuda.(x)
coerce_cuda(x) = x .+ CuArrays.fill(0)
function struct_grad!(cx::Zygote.Context, x, )
for f in fieldnames(typeof(x))
Zygote.accum_param(cx, getfield(x, f), getfield(, f))
end
dx = Zygote.grad_mut(cx, x)
dx[] = Zygote.accum(dx[], )
return dx
end
for RNN in (CuRNN, CuGRU) for RNN in (CuRNN, CuGRU)
@eval @adjoint function (m::$RNN{T})(h::CuArray{T}, x::CuArray{T}) where T <: Union{Float32,Float64} @eval @adjoint function (m::$RNN{T})(h::CuArray{T}, x::CuArray{T}) where T <: Union{Float32,Float64}
reserve, (y, ho) = forwardTrain(desc(m), x, h) reserve, (y, ho) = forwardTrain(desc(m), x, h)
(ho, y), function (Δ) (ho, y), function (Δ)
dho, dy = Δ dho, dy = coerce_cuda(Δ)
h_ = hBatch(x, h) h_ = hBatch(x, h)
dx, dh = backwardData(descs[m], y, dy, dho, h_, reserve) dx, dh = backwardData(descs[m], y, dy, dho, h_, reserve)
(dWi, dWh), db = backwardWeights(descs[m], x, h_, y, reserve) (dWi, dWh), db = backwardWeights(descs[m], x, h_, y, reserve)
dm = Ref{Any}((σ=nothing,Wi=transpose(dWi),Wh=transpose(dWh),b=db,h=nothing)) dm = struct_grad!(__context__, m, (σ=nothing,Wi=transpose(dWi),Wh=transpose(dWh),b=db,h=nothing))
(dm, unbroadcast(h, dh), dx) (dm, unbroadcast(h, dh), dx)
end end
end end
@ -316,13 +331,13 @@ end
@adjoint function (m::CuLSTM)((h, c)::Tuple{CuArray{T},CuArray{T}}, x::CuArray{T}) where T <: Union{Float32,Float64} @adjoint function (m::CuLSTM)((h, c)::Tuple{CuArray{T},CuArray{T}}, x::CuArray{T}) where T <: Union{Float32,Float64}
reserve, (y, ho, co) = forwardTrain(desc(m), x, h, c) reserve, (y, ho, co) = forwardTrain(desc(m), x, h, c)
((ho, co), y), function (Δ) ((ho, co), y), function (Δ)
dhc, dy = Δ dhc, dy = coerce_cuda(Δ)
dho, dco = dhc === nothing ? (nothing, nothing) : dhc dho, dco = dhc === nothing ? (nothing, nothing) : dhc
h_ = hBatch(x, h) h_ = hBatch(x, h)
c_ = hBatch(x, c) c_ = hBatch(x, c)
dx, dh, dc = backwardData(descs[m], y, dy, dho, dco, h_, c_, reserve) dx, dh, dc = backwardData(descs[m], y, dy, dho, dco, h_, c_, reserve)
(dWi, dWh), db = backwardWeights(descs[m], x, h_, y, reserve) (dWi, dWh), db = backwardWeights(descs[m], x, h_, y, reserve)
dm = Ref{Any}((Wi=transpose(dWi),Wh=transpose(dWh),b=db,h=nothing,c=nothing)) dm = struct_grad!(__context__, m, (Wi=transpose(dWi),Wh=transpose(dWh),b=db,h=nothing,c=nothing))
(dm, (unbroadcast(h, dh), unbroadcast(c, dc)), dx) (dm, (unbroadcast(h, dh), unbroadcast(c, dc)), dx)
end end
end end

View File

@ -1,5 +1,5 @@
using Flux, CuArrays, Test using Flux, CuArrays, Test
using Flux: forward using Flux: pullback
@testset "CUDNN BatchNorm" begin @testset "CUDNN BatchNorm" begin
@testset "4D Input" begin @testset "4D Input" begin
@ -8,8 +8,8 @@ using Flux: forward
cx = gpu(x) cx = gpu(x)
cm = gpu(m) cm = gpu(m)
y, back = forward((m, x) -> m(x), m, x) y, back = pullback((m, x) -> m(x), m, x)
cy, cback = forward((m, x) -> m(x), cm, cx) cy, cback = pullback((m, x) -> m(x), cm, cx)
@test cpu(cy) y @test cpu(cy) y
@ -28,8 +28,8 @@ using Flux: forward
cx = gpu(x) cx = gpu(x)
cm = gpu(m) cm = gpu(m)
y, back = forward((m, x) -> m(x), m, x) y, back = pullback((m, x) -> m(x), m, x)
cy, cback = forward((m, x) -> m(x), cm, cx) cy, cback = pullback((m, x) -> m(x), cm, cx)
@test cpu(cy) y @test cpu(cy) y

View File

@ -1,5 +1,14 @@
using Flux, CuArrays, Test using Flux, CuArrays, Test
using Flux: forward using Flux: pullback
@testset for R in [RNN, GRU, LSTM]
m = R(10, 5) |> gpu
x = gpu(rand(10))
(,) = gradient(m -> sum(m(x)), m)
Flux.reset!(m)
θ = gradient(() -> sum(m(x)), params(m))
@test collect([].cell[].Wi) == collect(θ[m.cell.Wi])
end
@testset "RNN" begin @testset "RNN" begin
@testset for R in [RNN, GRU, LSTM], batch_size in (1, 5) @testset for R in [RNN, GRU, LSTM], batch_size in (1, 5)
@ -13,8 +22,8 @@ using Flux: forward
rand(10, batch_size) rand(10, batch_size)
cux = gpu(x) cux = gpu(x)
y, back = forward((r, x) -> (r(x)), rnn, x) y, back = pullback((r, x) -> (r(x)), rnn, x)
cuy, cuback = forward((r, x) -> (r(x)), curnn, cux) cuy, cuback = pullback((r, x) -> (r(x)), curnn, cux)
@test y collect(cuy) @test y collect(cuy)
@test haskey(Flux.CUDA.descs, curnn.cell) @test haskey(Flux.CUDA.descs, curnn.cell)

View File

@ -1,7 +1,7 @@
using Flux, Test, Statistics using Flux, Test, Statistics
using Zygote: forward using Zygote: pullback
trainmode(f, x...) = forward(f, x...)[1] trainmode(f, x...) = pullback(f, x...)[1]
trainmode(f) = (x...) -> trainmode(f, x...) trainmode(f) = (x...) -> trainmode(f, x...)
@testset "Dropout" begin @testset "Dropout" begin

View File

@ -55,7 +55,7 @@ const ϵ = 1e-7
y = rand(T, 2) y = rand(T, 2)
ŷ = rand(T, 2) ŷ = rand(T, 2)
for f in (mse, crossentropy, logitcrossentropy) for f in (mse, crossentropy, logitcrossentropy)
fwd, back = Flux.forward(f, , y) fwd, back = Flux.pullback(f, , y)
@test fwd isa T @test fwd isa T
@test eltype(back(one(T))[1]) == T @test eltype(back(one(T))[1]) == T
end end