Merge pull request #345 from JoshChristie/update-1.0

Fix for 0.7 and 1.0 updates
2018-08-16 16:51:04 +01:00 · 2018-08-16 16:51:04 +01:00 · 4045c322d5
commit 4045c322d5
parent 62d594af43 a3ab1cbb98
22 changed files with 61 additions and 48 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -5,10 +5,15 @@ os:
  # - osx
 julia:
  - 0.7
  - 1.0
  - nightly
 # uncomment the following lines to override the default test script
 # script:
 #   - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
 #   - julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)'
 matrix:
  allow_failures:
    - julia: nightly
 after_success:
  - julia -e 'Pkg.add("Documenter")'
  - julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))'
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-julia 0.7-
+julia 0.7
 Juno
 MacroTools 0.3.3
 NNlib
--- a/src/cuda/cudnn.jl
+++ b/src/cuda/cudnn.jl
@ -1,6 +1,8 @@
 using CuArrays.CUDNN: @check, libcudnn, cudnnStatus_t, libcudnn_handle,
  cudnnDataType, TensorDesc, FilterDesc
 using LinearAlgebra 
 mutable struct DropoutDesc
  ptr::Ptr{Nothing}
  states::CuVector{UInt8}
@ -244,14 +246,14 @@ import ..Tracker: TrackedArray
 using CUDAnative
 using CuArrays: @cuindex, cudims
-function copy_transpose!(dst::CuArray, src::CuArray)
+function LinearAlgebra.copy_transpose!(dst::CuArray, src::CuArray)
  function kernel(dst, src)
    I = @cuindex dst
    dst[I...] = src[reverse(I)...]
    return
  end
  blk, thr = cudims(dst)
-  @cuda (blk, thr) kernel(dst, src)
+  @cuda blocks=blk threads=thr kernel(dst, src)
  return dst
 end
--- a/src/data/cmudict.jl
+++ b/src/data/cmudict.jl
@ -14,7 +14,7 @@ function load()
      return
    end
  end
-  info("Downloading CMUDict dataset")
+  @info "Downloading CMUDict dataset"
  mkpath(deps("cmudict"))
  for x in suffixes
    download("$cache_prefix/http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-$version$x",
--- a/src/data/mnist.jl
+++ b/src/data/mnist.jl
@ -14,7 +14,7 @@ function load()
                 "t10k-images-idx3-ubyte",
                 "t10k-labels-idx1-ubyte"]
      isfile(file) && continue
-      info("Downloading MNIST dataset")
+      @info "Downloading MNIST dataset"
      download("https://cache.julialang.org/http://yann.lecun.com/exdb/mnist/$file.gz", "$file.gz")
      open(file, "w") do io
        write(io, GZip.open(read, "$file.gz"))
--- a/src/data/sentiment.jl
+++ b/src/data/sentiment.jl
@ -5,7 +5,7 @@ using ..Data: deps
 function load()
  isfile(deps("sentiment.zip")) || return
-  info("Downloading sentiment treebank dataset")
+  @info "Downloading sentiment treebank dataset"
  download("https://cache.julialang.org/https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip",
           deps("sentiment.zip"))
 end
--- a/src/layers/basic.jl
+++ b/src/layers/basic.jl
@ -21,8 +21,8 @@ struct Chain
  Chain(xs...) = new([xs...])
 end
-@forward Chain.layers Base.getindex, Base.first, Base.last, Base.endof, Base.push!
+@forward Chain.layers Base.getindex, Base.first, Base.last, Base.lastindex, Base.push!
-@forward Chain.layers Base.start, Base.next, Base.done
+@forward Chain.layers Base.iterate
 children(c::Chain) = c.layers
 mapchildren(f, c::Chain) = Chain(f.(c.layers)...)
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@ -1,6 +1,6 @@
 using NNlib: conv
-@generated sub2(::Type{Val{N}}) where N = :(Val{$(N-2)})
+@generated sub2(::Type{Val{N}}) where N = :(Val($(N-2)))
 expand(N, i::Tuple) = i
 expand(N, i::Integer) = ntuple(_ -> i, N)
@ -32,7 +32,7 @@ Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
 Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = initn,
     stride = 1, pad = 0, dilation = 1) where N =
-  Conv(param(init(k..., ch...)), param(zero(ch[2])), σ,
+  Conv(param(init(k..., ch...)), param(zeros(ch[2])), σ,
       stride = stride, pad = pad, dilation = dilation)
@treelike Conv
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@ -130,13 +130,13 @@ function (BN::BatchNorm)(x)
    ϵ = data(convert(T, BN.ϵ))
    axes = [1:dims-2; dims] # axes to reduce along (all but channels axis)
-    μ = mean(x, axes)
+    μ = mean(x, dims = axes)
-    σ = sqrt.(mean((x .- μ).^2, axes) .+ ϵ)
+    σ = sqrt.(mean((x .- μ).^2, dims = axes) .+ ϵ)
    # update moving mean/std
    mtm = data(convert(T, BN.momentum))
-    BN.μ = (1 - mtm) .* BN.μ .+ mtm .* squeeze(data(μ), (axes...,))
+    BN.μ = (1 - mtm) .* BN.μ .+ mtm .* dropdims(data(μ), dims = (axes...,))
-    BN.σ = (1 - mtm) .* BN.σ .+ mtm .* squeeze(data(σ), (axes...,)) .* m ./ (m - 1)
+    BN.σ = (1 - mtm) .* BN.σ .+ mtm .* dropdims(data(σ), dims = (axes...,)) .* m ./ (m - 1)
  end
  let λ = BN.λ
--- a/src/optimise/Optimise.jl
+++ b/src/optimise/Optimise.jl
@ -9,7 +9,7 @@ struct Param{T}
  Δ::T
 end
-Base.convert(::Type{Param}, x::AbstractArray) = Param(x, zero(x))
+Param(x::AbstractArray) = Param(x, zero(x))
 include("optimisers.jl")
 include("interface.jl")
@ -17,6 +17,7 @@ include("train.jl")
 using Flux.Tracker: TrackedArray
-Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad)
+Param(x::TrackedArray) = Param(x.data, x.grad)
 # Base.convert(::Type{Param}, x::TrackedArray) = Param(x.data, x.grad)
 end
--- a/src/optimise/train.jl
+++ b/src/optimise/train.jl
@ -59,7 +59,7 @@ hello
 """
 macro epochs(n, ex)
  :(@progress for i = 1:$(esc(n))
-      info("Epoch $i")
+      @info "Epoch $i"
      $(esc(ex))
    end)
 end
--- a/src/tracker/Tracker.jl
+++ b/src/tracker/Tracker.jl
@ -77,8 +77,7 @@ include("numeric.jl")
 Hook into gradient backpropagation. `x` is unmodified, but when backpropagating
 `f` will be applied to the incoming gradient. For example, `hook(-, x)` will reverse
-the sign of the gradient applied to `x`.
+the sign of the gradient applied to `x`."""
 """
 hook(f, x) = istracked(x) ? track(hook, f, x) : x
@grad hook(f, x) = data(x), Δ -> (nothing, f(Δ))
--- a/src/tracker/back.jl
+++ b/src/tracker/back.jl
@ -70,7 +70,7 @@ struct Params
  Params(xs) = new(IdSet(xs))
 end
-@forward Params.params Base.start, Base.next, Base.done
+@forward Params.params Base.iterate, Base.length
 function Base.show(io::IO, ps::Params)
  print(io, "Params([")
@ -86,6 +86,8 @@ Base.show(io::IO, ps::Grads) = println(io, "Grads(...)")
 Grads() = Grads(IdDict())
@forward Grads.grads Base.setindex!, Base.haskey, Base.length, Base.iterate
 Grads(ps::Params) = Grads(IdDict(tracker(p) => init_grad(data(p)) for p in ps))
 Base.getindex(g::Grads, x::Tracked) = g.grads[x]
@ -94,7 +96,6 @@ function Base.getindex(g::Grads, x)
  g[tracker(x)]
 end
@forward Grads.grads Base.setindex!, Base.haskey
 accum!(g::Grads, x, Δ) = g[x] = haskey(g, x) ? g[x] .+ Δ : Δ
@ -136,7 +137,7 @@ end
 function forward(f, args...)
  args = param.(args)
  y, back = forward(() -> f(args...), Params(args))
-  y, Δ -> getindex.(back(Δ), args)
+  y, Δ -> getindex.(Ref(back(Δ)), args)
 end
 function losscheck(x)
--- a/src/tracker/idset.jl
+++ b/src/tracker/idset.jl
@ -20,6 +20,8 @@ Base.similar(s::IdSet, T::Type) = IdSet{T}()
@forward IdSet.dict Base.length
-Base.start(s::IdSet) = start(keys(s.dict))
+function Base.iterate(v::IdSet, state...)
-Base.next(s::IdSet, st) = next(keys(s.dict), st)
+  y = Base.iterate(keys(v.dict), state...)
-Base.done(s::IdSet, st) = done(keys(s.dict), st)
+  y === nothing && return nothing
  return (y[1], y[2])
 end
--- a/test/cuda/cuda.jl
+++ b/test/cuda/cuda.jl
@ -1,7 +1,7 @@
 using Flux, Flux.Tracker, CuArrays, Test
 using Flux: gpu
-info("Testing Flux/GPU")
+@info "Testing Flux/GPU"
@testset "CuArrays" begin
--- a/test/cuda/cudnn.jl
+++ b/test/cuda/cudnn.jl
@ -1,6 +1,6 @@
 using Flux, CuArrays, Test
-info("Testing Flux/CUDNN")
+@info "Testing Flux/CUDNN"
@testset "RNN" begin
  @testset for R in [RNN, GRU, LSTM]
--- a/test/layers/normalisation.jl
+++ b/test/layers/normalisation.jl
@ -53,17 +53,17 @@ end
    #  .1 * 4 + 0 = .4
    @test m.μ ≈ reshape([0.3, 0.4], 2, 1)
-    # julia> .1 .* std(x, 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.]
+    # julia> .1 .* std(x, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.]
    # 2×1 Array{Float64,2}:
    #  1.14495
    #  1.14495
-    @test m.σ ≈ .1 .* std(x.data, 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.]
+    @test m.σ ≈ .1 .* std(x.data, dims = 2, corrected=false) .* (3 / 2).+ .9 .* [1., 1.]
    testmode!(m)
    @test !m.active
    x′ = m(x).data
-    @test x′[1] ≈ (1 - 0.3) / 1.1449489742783179
+    @test x′[1] ≈ (1 .- 0.3) / 1.1449489742783179
  end
  # with activation function
--- a/test/layers/stateless.jl
+++ b/test/layers/stateless.jl
@ -42,8 +42,8 @@ const ϵ = 1e-7
  logŷ, y = randn(3), rand(3)
  @testset "binarycrossentropy" begin
-    @test binarycrossentropy.(σ.(logŷ), y; ϵ=0) ≈ -y.*log.(σ.(logŷ)) - (1 - y).*log.(1 - σ.(logŷ))
+    @test binarycrossentropy.(σ.(logŷ), y; ϵ=0) ≈ -y.*log.(σ.(logŷ)) - (1 .- y).*log.(1 .- σ.(logŷ))
-    @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 - y).*log.(1 - σ.(logŷ) .+ eps.(σ.(logŷ)))
+    @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 .- y).*log.(1 .- σ.(logŷ) .+ eps.(σ.(logŷ)))
  end
  @testset "logitbinarycrossentropy" begin
--- a/test/optimise.jl
+++ b/test/optimise.jl
@ -1,6 +1,6 @@
 using Flux.Optimise
 using Flux.Tracker
-
+using Test
@testset "Optimise" begin
  w = randn(10, 10)
  @testset for Opt in [SGD, Nesterov, Momentum, ADAM, AdaMax, RMSProp, ps -> ADAGrad(ps, 0.1), ADADelta, AMSGrad, NADAM]
--- a/test/runtests.jl
+++ b/test/runtests.jl
@ -1,6 +1,7 @@
 using Flux, Test, Random
 using Random
-srand(0)
+Random.seed!(0)
@testset "Flux" begin
@ -11,8 +12,8 @@ include("layers/stateless.jl")
 include("optimise.jl")
 include("data.jl")
-# if Base.find_in_path("CuArrays") ≠ nothing
+if Base.find_package("CuArrays") ≠ nothing
-#   include("cuda/cuda.jl")
+  include("cuda/cuda.jl")
-# end
+end
 end
--- a/test/tracker.jl
+++ b/test/tracker.jl
@ -3,23 +3,20 @@ using Flux.Tracker, Test, NNlib
 using Flux.Tracker: TrackedReal, gradcheck, grad, derivative, checkpoint
 using NNlib: conv
 using Printf: @sprintf
-using LinearAlgebra: diagm, dot, LowerTriangular, norm
+using LinearAlgebra: Diagonal, dot, LowerTriangular, norm
 using Statistics: mean, std
 using Random
 # using StatsBase
 gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...)
-gradtest(f, dims...) = gradtest(f, rand.(dims)...)
+gradtest(f, dims...) = gradtest(f, rand.(Float64, dims)...)
@testset "Tracker" begin
@test gradtest((x, W, b) -> σ.(W*x .+ b), 5, (2,5), 2)
@test gradtest((x, W, b) -> σ.(W*x .+ b), (5,3), (2,5), 2)
@test gradtest((x, W, b) -> logσ.(W*x .+ b), 5, (2,5), 2)
@test gradtest((x, W, b) -> logσ.(W*x .+ b), (5,3), (2,5), 2)
@test gradtest((w, x) -> w'*x, randn(Float64,10, 2), randn(Float64,10))
@test gradtest((w, x) -> w*x', randn(Float64,5,5), randn(Float64,5,5))
@test gradtest(x -> sum(x, dims = (2, 3)), (3,4,5))
@test gradtest(x -> sum(x, dims = 1), randn(Float64,2,3))
@test gradtest(x -> sum(x, dims = [1,2]), randn(Float64,2,3))
@ -36,7 +33,6 @@ gradtest(f, dims...) = gradtest(f, rand.(dims)...)
@test gradtest(Flux.crossentropy, rand(5,5), rand(5, 5))
@test gradtest(x -> x', rand(5))
 function promotiontest(f, A, B, C)
  r0 = f(A, B, C)
  r1 = f(param(A), B, C)
@ -69,6 +65,7 @@ end
    @test gradtest(vcatf, rand(5)', rand(2,5))
  end
  @testset for hcatf in [hcat, cat2]
    @test gradtest(hcatf, rand(5), rand(5))
    @test gradtest(hcatf, rand(5)', rand(5)')
@ -97,7 +94,7 @@ end
  @test !isa(vcat(rand(2)), TrackedArray)
  @test !isa(hcat(rand(2)), TrackedArray)
-  @test !isa(cat(1,rand(2)), TrackedArray)
+  @test !isa(cat(rand(2), dims=1), TrackedArray)
  @test gradtest((a,b)->cat(a, b, dims = (2,3,5)), rand(2,3), rand(2,4,2,1))
@ -115,6 +112,7 @@ end
    promotiontest(hcat, rand(4,3,5), rand(4,1,5), rand(4,2,5))
    promotiontest((x...) -> cat(x..., dims = 3), rand(4,5,3), rand(4,5,1), rand(4,5,2))
  end
 end
@test gradtest(x -> permutedims(x, [3,1,2]), rand(4,5,6))
@ -128,7 +126,7 @@ end
@test gradtest(kron, rand(5,1), rand(3,1), rand(8,1))
@test gradtest(kron, rand(5,2), rand(3,2), rand(8,2))
-@test gradtest(diagm, rand(3))
+@test gradtest(f-> Matrix(Diagonal(f)), rand(3))
@testset "mean" begin
  @test gradtest(mean, rand(2, 3))
--- a/test/utils.jl
+++ b/test/utils.jl
@ -1,6 +1,10 @@
-using Flux: throttle, initn, glorot_uniform, glorot_normal, jacobian
+using Flux
 using Flux: throttle, jacobian, initn, glorot_uniform, glorot_normal
 using StatsBase: std
 using Dates
 using Random
 using Test
 using Dates: now
@testset "Throttle" begin
  @testset "default behaviour" begin
@ -61,7 +65,7 @@ end
@testset "Initialization" begin
  # Set random seed so that these tests don't fail randomly
-  srand(0)
+  Random.seed!(0)
  # initn() should yield a kernel with stddev ~= 1e-2
  v = initn(10, 10)
  @test std(v) > 0.9*1e-2