Merge pull request #345 from JoshChristie/update-1.0

Fix for 0.7 and 1.0 updates
This commit is contained in:
Mike J Innes 2018-08-16 16:51:04 +01:00 committed by GitHub
commit 4045c322d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 61 additions and 48 deletions

View File

@ -5,10 +5,15 @@ os:
# - osx # - osx
julia: julia:
- 0.7 - 0.7
- 1.0
- nightly
# uncomment the following lines to override the default test script # uncomment the following lines to override the default test script
# script: # script:
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi # - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)' # - julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)'
matrix:
allow_failures:
- julia: nightly
after_success: after_success:
- julia -e 'Pkg.add("Documenter")' - julia -e 'Pkg.add("Documenter")'
- julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))' - julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))'

View File

@ -1,4 +1,4 @@
julia 0.7- julia 0.7
Juno Juno
MacroTools 0.3.3 MacroTools 0.3.3
NNlib NNlib

View File

@ -1,6 +1,8 @@
using CuArrays.CUDNN: @check, libcudnn, cudnnStatus_t, libcudnn_handle, using CuArrays.CUDNN: @check, libcudnn, cudnnStatus_t, libcudnn_handle,
cudnnDataType, TensorDesc, FilterDesc cudnnDataType, TensorDesc, FilterDesc
using LinearAlgebra
mutable struct DropoutDesc mutable struct DropoutDesc
ptr::Ptr{Nothing} ptr::Ptr{Nothing}
states::CuVector{UInt8} states::CuVector{UInt8}
@ -244,14 +246,14 @@ import ..Tracker: TrackedArray
using CUDAnative using CUDAnative
using CuArrays: @cuindex, cudims using CuArrays: @cuindex, cudims
function copy_transpose!(dst::CuArray, src::CuArray) function LinearAlgebra.copy_transpose!(dst::CuArray, src::CuArray)
function kernel(dst, src) function kernel(dst, src)
I = @cuindex dst I = @cuindex dst
dst[I...] = src[reverse(I)...] dst[I...] = src[reverse(I)...]
return return
end end
blk, thr = cudims(dst) blk, thr = cudims(dst)
@cuda (blk, thr) kernel(dst, src) @cuda blocks=blk threads=thr kernel(dst, src)
return dst return dst
end end

View File

@ -14,7 +14,7 @@ function load()
return return
end end
end end
info("Downloading CMUDict dataset") @info "Downloading CMUDict dataset"
mkpath(deps("cmudict")) mkpath(deps("cmudict"))
for x in suffixes for x in suffixes
download("$cache_prefix/http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x", download("$cache_prefix/http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x",

View File

@ -14,7 +14,7 @@ function load()
"t10k-images-idx3-ubyte", "t10k-images-idx3-ubyte",
"t10k-labels-idx1-ubyte"] "t10k-labels-idx1-ubyte"]
isfile(file) && continue isfile(file) && continue
info("Downloading MNIST dataset") @info "Downloading MNIST dataset"
download("https://cache.julialang.org/http://yann.lecun.com/exdb/mnist/$file.gz", "$file.gz") download("https://cache.julialang.org/http://yann.lecun.com/exdb/mnist/$file.gz", "$file.gz")
open(file, "w") do io open(file, "w") do io
write(io, GZip.open(read, "$file.gz")) write(io, GZip.open(read, "$file.gz"))

View File

@ -5,7 +5,7 @@ using ..Data: deps
function load() function load()
isfile(deps("sentiment.zip")) || return isfile(deps("sentiment.zip")) || return
info("Downloading sentiment treebank dataset") @info "Downloading sentiment treebank dataset"
download("https://cache.julialang.org/https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip", download("https://cache.julialang.org/https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip",
deps("sentiment.zip")) deps("sentiment.zip"))
end end

View File

@ -21,8 +21,8 @@ struct Chain
Chain(xs...) = new([xs...]) Chain(xs...) = new([xs...])
end end
@forward Chain.layers Base.getindex, Base.first, Base.last, Base.endof, Base.push! @forward Chain.layers Base.getindex, Base.first, Base.last, Base.lastindex, Base.push!
@forward Chain.layers Base.start, Base.next, Base.done @forward Chain.layers Base.iterate
children(c::Chain) = c.layers children(c::Chain) = c.layers
mapchildren(f, c::Chain) = Chain(f.(c.layers)...) mapchildren(f, c::Chain) = Chain(f.(c.layers)...)

View File

@ -1,6 +1,6 @@
using NNlib: conv using NNlib: conv
@generated sub2(::Type{Val{N}}) where N = :(Val{$(N-2)}) @generated sub2(::Type{Val{N}}) where N = :(Val($(N-2)))
expand(N, i::Tuple) = i expand(N, i::Tuple) = i
expand(N, i::Integer) = ntuple(_ -> i, N) expand(N, i::Integer) = ntuple(_ -> i, N)
@ -32,7 +32,7 @@ Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn, Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn,
stride = 1, pad = 0, dilation = 1) where N = stride = 1, pad = 0, dilation = 1) where N =
Conv(param(init(k..., ch...)), param(zero(ch[2])), σ, Conv(param(init(k..., ch...)), param(zeros(ch[2])), σ,
stride = stride, pad = pad, dilation = dilation) stride = stride, pad = pad, dilation = dilation)
@treelike Conv @treelike Conv

View File

@ -130,13 +130,13 @@ function (BN::BatchNorm)(x)
ϵ = data(convert(T, BN.ϵ)) ϵ = data(convert(T, BN.ϵ))
axes = [1:dims-2; dims] # axes to reduce along (all but channels axis) axes = [1:dims-2; dims] # axes to reduce along (all but channels axis)
μ = mean(x, axes) μ = mean(x, dims = axes)
σ = sqrt.(mean((x .- μ).^2, axes) .+ ϵ) σ = sqrt.(mean((x .- μ).^2, dims = axes) .+ ϵ)
# update moving mean/std # update moving mean/std
mtm = data(convert(T, BN.momentum)) mtm = data(convert(T, BN.momentum))
BN.μ = (1 - mtm) .* BN.μ .+ mtm .* squeeze(data(μ), (axes...,)) BN.μ = (1 - mtm) .* BN.μ .+ mtm .* dropdims(data(μ), dims = (axes...,))
BN.σ = (1 - mtm) .* BN.σ .+ mtm .* squeeze(data(σ), (axes...,)) .* m ./ (m - 1) BN.σ = (1 - mtm) .* BN.σ .+ mtm .* dropdims(data(σ), dims = (axes...,)) .* m ./ (m - 1)
end end
let λ = BN.λ let λ = BN.λ

View File

@ -9,7 +9,7 @@ struct Param{T}
Δ::T Δ::T
end end
Base.convert(::Type{Param}, x::AbstractArray) = Param(x, zero(x)) Param(x::AbstractArray) = Param(x, zero(x))
include("optimisers.jl") include("optimisers.jl")
include("interface.jl") include("interface.jl")
@ -17,6 +17,7 @@ include("train.jl")
using Flux.Tracker: TrackedArray using Flux.Tracker: TrackedArray
Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad) Param(x::TrackedArray) = Param(x.data, x.grad)
# Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad)
end end

View File

@ -59,7 +59,7 @@ hello
""" """
macro epochs(n, ex) macro epochs(n, ex)
:(@progress for i = 1:$(esc(n)) :(@progress for i = 1:$(esc(n))
info("Epoch $i") @info "Epoch $i"
$(esc(ex)) $(esc(ex))
end) end)
end end

View File

@ -77,8 +77,7 @@ include("numeric.jl")
Hook into gradient backpropagation. `x` is unmodified, but when backpropagating Hook into gradient backpropagation. `x` is unmodified, but when backpropagating
`f` will be applied to the incoming gradient. For example, `hook(-, x)` will reverse `f` will be applied to the incoming gradient. For example, `hook(-, x)` will reverse
the sign of the gradient applied to `x`. the sign of the gradient applied to `x`."""
"""
hook(f, x) = istracked(x) ? track(hook, f, x) : x hook(f, x) = istracked(x) ? track(hook, f, x) : x
@grad hook(f, x) = data(x), Δ -> (nothing, f(Δ)) @grad hook(f, x) = data(x), Δ -> (nothing, f(Δ))

View File

@ -70,7 +70,7 @@ struct Params
Params(xs) = new(IdSet(xs)) Params(xs) = new(IdSet(xs))
end end
@forward Params.params Base.start, Base.next, Base.done @forward Params.params Base.iterate, Base.length
function Base.show(io::IO, ps::Params) function Base.show(io::IO, ps::Params)
print(io, "Params([") print(io, "Params([")
@ -86,6 +86,8 @@ Base.show(io::IO, ps::Grads) = println(io, "Grads(...)")
Grads() = Grads(IdDict()) Grads() = Grads(IdDict())
@forward Grads.grads Base.setindex!, Base.haskey, Base.length, Base.iterate
Grads(ps::Params) = Grads(IdDict(tracker(p) => init_grad(data(p)) for p in ps)) Grads(ps::Params) = Grads(IdDict(tracker(p) => init_grad(data(p)) for p in ps))
Base.getindex(g::Grads, x::Tracked) = g.grads[x] Base.getindex(g::Grads, x::Tracked) = g.grads[x]
@ -94,7 +96,6 @@ function Base.getindex(g::Grads, x)
g[tracker(x)] g[tracker(x)]
end end
@forward Grads.grads Base.setindex!, Base.haskey
accum!(g::Grads, x, Δ) = g[x] = haskey(g, x) ? g[x] .+ Δ : Δ accum!(g::Grads, x, Δ) = g[x] = haskey(g, x) ? g[x] .+ Δ : Δ
@ -136,7 +137,7 @@ end
function forward(f, args...) function forward(f, args...)
args = param.(args) args = param.(args)
y, back = forward(() -> f(args...), Params(args)) y, back = forward(() -> f(args...), Params(args))
y, Δ -> getindex.(back(Δ), args) y, Δ -> getindex.(Ref(back(Δ)), args)
end end
function losscheck(x) function losscheck(x)

View File

@ -20,6 +20,8 @@ Base.similar(s::IdSet, T::Type) = IdSet{T}()
@forward IdSet.dict Base.length @forward IdSet.dict Base.length
Base.start(s::IdSet) = start(keys(s.dict)) function Base.iterate(v::IdSet, state...)
Base.next(s::IdSet, st) = next(keys(s.dict), st) y = Base.iterate(keys(v.dict), state...)
Base.done(s::IdSet, st) = done(keys(s.dict), st) y === nothing && return nothing
return (y[1], y[2])
end

View File

@ -1,7 +1,7 @@
using Flux, Flux.Tracker, CuArrays, Test using Flux, Flux.Tracker, CuArrays, Test
using Flux: gpu using Flux: gpu
info("Testing Flux/GPU") @info "Testing Flux/GPU"
@testset "CuArrays" begin @testset "CuArrays" begin

View File

@ -1,6 +1,6 @@
using Flux, CuArrays, Test using Flux, CuArrays, Test
info("Testing Flux/CUDNN") @info "Testing Flux/CUDNN"
@testset "RNN" begin @testset "RNN" begin
@testset for R in [RNN, GRU, LSTM] @testset for R in [RNN, GRU, LSTM]

View File

@ -53,17 +53,17 @@ end
# .1 * 4 + 0 = .4 # .1 * 4 + 0 = .4
@test m.μ reshape([0.3, 0.4], 2, 1) @test m.μ reshape([0.3, 0.4], 2, 1)
# julia> .1 .* std(x, 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.] # julia> .1 .* std(x, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.]
# 2×1 Array{Float64,2}: # 2×1 Array{Float64,2}:
# 1.14495 # 1.14495
# 1.14495 # 1.14495
@test m.σ .1 .* std(x.data, 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.] @test m.σ .1 .* std(x.data, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.]
testmode!(m) testmode!(m)
@test !m.active @test !m.active
x = m(x).data x = m(x).data
@test x[1] (1 - 0.3) / 1.1449489742783179 @test x[1] (1 .- 0.3) / 1.1449489742783179
end end
# with activation function # with activation function

View File

@ -42,8 +42,8 @@ const ϵ = 1e-7
logŷ, y = randn(3), rand(3) logŷ, y = randn(3), rand(3)
@testset "binarycrossentropy" begin @testset "binarycrossentropy" begin
@test binarycrossentropy.(σ.(logŷ), y; ϵ=0) -y.*log.(σ.(logŷ)) - (1 - y).*log.(1 - σ.(logŷ)) @test binarycrossentropy.(σ.(logŷ), y; ϵ=0) -y.*log.(σ.(logŷ)) - (1 .- y).*log.(1 .- σ.(logŷ))
@test binarycrossentropy.(σ.(logŷ), y) -y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 - y).*log.(1 - σ.(logŷ) .+ eps.(σ.(logŷ))) @test binarycrossentropy.(σ.(logŷ), y) -y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 .- y).*log.(1 .- σ.(logŷ) .+ eps.(σ.(logŷ)))
end end
@testset "logitbinarycrossentropy" begin @testset "logitbinarycrossentropy" begin

View File

@ -1,6 +1,6 @@
using Flux.Optimise using Flux.Optimise
using Flux.Tracker using Flux.Tracker
using Test
@testset "Optimise" begin @testset "Optimise" begin
w = randn(10, 10) w = randn(10, 10)
@testset for Opt in [SGD, Nesterov, Momentum, ADAM, AdaMax, RMSProp, ps -> ADAGrad(ps, 0.1), ADADelta, AMSGrad, NADAM] @testset for Opt in [SGD, Nesterov, Momentum, ADAM, AdaMax, RMSProp, ps -> ADAGrad(ps, 0.1), ADADelta, AMSGrad, NADAM]

View File

@ -1,6 +1,7 @@
using Flux, Test, Random using Flux, Test, Random
using Random
srand(0) Random.seed!(0)
@testset "Flux" begin @testset "Flux" begin
@ -11,8 +12,8 @@ include("layers/stateless.jl")
include("optimise.jl") include("optimise.jl")
include("data.jl") include("data.jl")
# if Base.find_in_path("CuArrays") ≠ nothing if Base.find_package("CuArrays") nothing
# include("cuda/cuda.jl") include("cuda/cuda.jl")
# end end
end end

View File

@ -3,23 +3,20 @@ using Flux.Tracker, Test, NNlib
using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint
using NNlib: conv using NNlib: conv
using Printf: @sprintf using Printf: @sprintf
using LinearAlgebra: diagm, dot, LowerTriangular, norm using LinearAlgebra: Diagonal, dot, LowerTriangular, norm
using Statistics: mean, std using Statistics: mean, std
using Random
# using StatsBase # using StatsBase
gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...) gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...)
gradtest(f, dims...) = gradtest(f, rand.(dims)...) gradtest(f, dims...) = gradtest(f, rand.(Float64, dims)...)
@testset "Tracker" begin @testset "Tracker" begin
@test gradtest((x, W, b) -> σ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> σ.(W*x .+ b), 5, (2,5), 2)
@test gradtest((x, W, b) -> σ.(W*x .+ b), (5,3), (2,5), 2) @test gradtest((x, W, b) -> σ.(W*x .+ b), (5,3), (2,5), 2)
@test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2)
@test gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2) @test gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2)
@test gradtest((w, x) -> w'*x, randn(Float64,10, 2), randn(Float64,10)) @test gradtest((w, x) -> w'*x, randn(Float64,10, 2), randn(Float64,10))
@test gradtest((w, x) -> w*x', randn(Float64,5,5), randn(Float64,5,5)) @test gradtest((w, x) -> w*x', randn(Float64,5,5), randn(Float64,5,5))
@test gradtest(x -> sum(x, dims = (2, 3)), (3,4,5)) @test gradtest(x -> sum(x, dims = (2, 3)), (3,4,5))
@test gradtest(x -> sum(x, dims = 1), randn(Float64,2,3)) @test gradtest(x -> sum(x, dims = 1), randn(Float64,2,3))
@test gradtest(x -> sum(x, dims = [1,2]), randn(Float64,2,3)) @test gradtest(x -> sum(x, dims = [1,2]), randn(Float64,2,3))
@ -36,7 +33,6 @@ gradtest(f, dims...) = gradtest(f, rand.(dims)...)
@test gradtest(Flux.crossentropy, rand(5,5), rand(5, 5)) @test gradtest(Flux.crossentropy, rand(5,5), rand(5, 5))
@test gradtest(x -> x', rand(5)) @test gradtest(x -> x', rand(5))
function promotiontest(f, A, B, C) function promotiontest(f, A, B, C)
r0 = f(A, B, C) r0 = f(A, B, C)
r1 = f(param(A), B, C) r1 = f(param(A), B, C)
@ -69,6 +65,7 @@ end
@test gradtest(vcatf, rand(5)', rand(2,5)) @test gradtest(vcatf, rand(5)', rand(2,5))
end end
@testset for hcatf in [hcat, cat2] @testset for hcatf in [hcat, cat2]
@test gradtest(hcatf, rand(5), rand(5)) @test gradtest(hcatf, rand(5), rand(5))
@test gradtest(hcatf, rand(5)', rand(5)') @test gradtest(hcatf, rand(5)', rand(5)')
@ -97,7 +94,7 @@ end
@test !isa(vcat(rand(2)), TrackedArray) @test !isa(vcat(rand(2)), TrackedArray)
@test !isa(hcat(rand(2)), TrackedArray) @test !isa(hcat(rand(2)), TrackedArray)
@test !isa(cat(1,rand(2)), TrackedArray) @test !isa(cat(rand(2), dims=1), TrackedArray)
@test gradtest((a,b)->cat(a, b, dims = (2,3,5)), rand(2,3), rand(2,4,2,1)) @test gradtest((a,b)->cat(a, b, dims = (2,3,5)), rand(2,3), rand(2,4,2,1))
@ -115,6 +112,7 @@ end
promotiontest(hcat, rand(4,3,5), rand(4,1,5), rand(4,2,5)) promotiontest(hcat, rand(4,3,5), rand(4,1,5), rand(4,2,5))
promotiontest((x...) -> cat(x..., dims = 3), rand(4,5,3), rand(4,5,1), rand(4,5,2)) promotiontest((x...) -> cat(x..., dims = 3), rand(4,5,3), rand(4,5,1), rand(4,5,2))
end end
end end
@test gradtest(x -> permutedims(x, [3,1,2]), rand(4,5,6)) @test gradtest(x -> permutedims(x, [3,1,2]), rand(4,5,6))
@ -128,7 +126,7 @@ end
@test gradtest(kron, rand(5,1), rand(3,1), rand(8,1)) @test gradtest(kron, rand(5,1), rand(3,1), rand(8,1))
@test gradtest(kron, rand(5,2), rand(3,2), rand(8,2)) @test gradtest(kron, rand(5,2), rand(3,2), rand(8,2))
@test gradtest(diagm, rand(3)) @test gradtest(f-> Matrix(Diagonal(f)), rand(3))
@testset "mean" begin @testset "mean" begin
@test gradtest(mean, rand(2, 3)) @test gradtest(mean, rand(2, 3))

View File

@ -1,6 +1,10 @@
using Flux: throttle, initn, glorot_uniform, glorot_normal, jacobian using Flux
using Flux: throttle, jacobian, initn, glorot_uniform, glorot_normal
using StatsBase: std using StatsBase: std
using Dates using Dates
using Random
using Test
using Dates: now
@testset "Throttle" begin @testset "Throttle" begin
@testset "default behaviour" begin @testset "default behaviour" begin
@ -61,7 +65,7 @@ end
@testset "Initialization" begin @testset "Initialization" begin
# Set random seed so that these tests don't fail randomly # Set random seed so that these tests don't fail randomly
srand(0) Random.seed!(0)
# initn() should yield a kernel with stddev ~= 1e-2 # initn() should yield a kernel with stddev ~= 1e-2
v = initn(10, 10) v = initn(10, 10)
@test std(v) > 0.9*1e-2 @test std(v) > 0.9*1e-2