From 0c265f305a7fd685525f6a1e006d5e4873fe7c8b Mon Sep 17 00:00:00 2001
From: Mike J Innes <mike.j.innes@gmail.com>
Date: Fri, 8 Mar 2019 14:49:28 +0000
Subject: [PATCH] fix most tests

---
 Manifest.toml                |  2 +-
 test/cuda/cuda.jl            |  2 +-
 test/cuda/cudnn.jl           |  3 +--
 test/layers/normalisation.jl | 15 +++++++--------
 test/layers/stateless.jl     |  7 ++++---
 test/optimise.jl             |  7 +++----
 test/tracker.jl              |  2 +-
 test/utils.jl                | 11 +++++------
 8 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 06348d88..e934703f 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -309,7 +309,7 @@ version = "0.8.1"
 
 [[Zygote]]
 deps = ["DiffRules", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions"]
-git-tree-sha1 = "7fcb55117550e1c195a646947135cc9aac1e2afc"
+git-tree-sha1 = "db27148be2365d2fe507f49ada875050b08d8187"
 repo-rev = "master"
 repo-url = "https://github.com/FluxML/Zygote.jl.git"
 uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl
index 86e7f2f3..4310d29b 100644
--- a/test/cuda/cuda.jl
+++ b/test/cuda/cuda.jl
@@ -1,4 +1,4 @@
-using Flux, Flux.Tracker, CuArrays, Test
+using Flux, CuArrays, Test
 using Flux: gpu
 
 @info "Testing GPU Support"
diff --git a/test/cuda/cudnn.jl b/test/cuda/cudnn.jl
index 9a154961..d6183629 100644
--- a/test/cuda/cudnn.jl
+++ b/test/cuda/cudnn.jl
@@ -1,5 +1,4 @@
-using Flux, Flux.Tracker, CuArrays, Test
-using Flux.Tracker: TrackedArray, data
+using Flux, CuArrays, Test
 
 @testset "CUDNN BatchNorm" begin
     @testset "4D Input" begin
diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl
index 8bc3d1cd..7de3e958 100644
--- a/test/layers/normalisation.jl
+++ b/test/layers/normalisation.jl
@@ -1,5 +1,4 @@
 using Flux: testmode!
-using Flux.Tracker: data
 
 @testset "Dropout" begin
   x = [1.,2.,3.]
@@ -29,8 +28,8 @@ using Flux.Tracker: data
 end
 
 @testset "BatchNorm" begin
-  let m = BatchNorm(2), x = param([1 3 5;
-                                   2 4 6])
+  let m = BatchNorm(2), x = [1 3 5;
+                             2 4 6]
 
     @test m.β.data == [0, 0]  # initβ(2)
     @test m.γ.data == [1, 1]  # initγ(2)
@@ -111,7 +110,7 @@ end
   expand_inst = (x, as) -> reshape(repeat(x, outer=[1, as[length(as)]]), as...)
   # begin tests
   let m = InstanceNorm(2), sizes = (3, 2, 2),
-      x = param(reshape(collect(1:prod(sizes)), sizes))
+      x = reshape(collect(1:prod(sizes)), sizes)
 
       @test m.β.data == [0, 0]  # initβ(2)
       @test m.γ.data == [1, 1]  # initγ(2)
@@ -157,7 +156,7 @@ end
   end
   # with activation function
   let m = InstanceNorm(2, sigmoid), sizes = (3, 2, 2),
-      x = param(reshape(collect(1:prod(sizes)), sizes))
+      x = reshape(collect(1:prod(sizes)), sizes)
 
     affine_shape = collect(sizes)
     affine_shape[1] = 1
@@ -173,7 +172,7 @@ end
   end
 
   let m = InstanceNorm(2), sizes = (2, 4, 1, 2, 3),
-      x = param(reshape(collect(1:prod(sizes)), sizes))
+      x = reshape(collect(1:prod(sizes)), sizes)
     y = reshape(permutedims(x, [3, 1, 2, 4, 5]), :, 2, 3)
     y = reshape(m(y), sizes...)
     @test m(x) == y
@@ -181,7 +180,7 @@ end
 
   # check that μ, σ², and the output are the correct size for higher rank tensors
   let m = InstanceNorm(2), sizes = (5, 5, 3, 4, 2, 6),
-      x = param(reshape(collect(1:prod(sizes)), sizes))
+      x = reshape(collect(1:prod(sizes)), sizes)
     y = m(x)
     @test size(m.μ) == (sizes[end - 1], )
     @test size(m.σ²) == (sizes[end - 1], )
@@ -190,7 +189,7 @@ end
 
   # show that instance norm is equal to batch norm when channel and batch dims are squashed
   let m_inorm = InstanceNorm(2), m_bnorm = BatchNorm(12), sizes = (5, 5, 3, 4, 2, 6),
-      x = param(reshape(collect(1:prod(sizes)), sizes))
+      x = reshape(collect(1:prod(sizes)), sizes)
     @test m_inorm(x) == reshape(m_bnorm(reshape(x, (sizes[1:end - 2]..., :, 1))), sizes)
   end
 
diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl
index 34abb8cb..745bf22a 100644
--- a/test/layers/stateless.jl
+++ b/test/layers/stateless.jl
@@ -1,6 +1,7 @@
 using Test
 using Flux: onehotbatch, mse, crossentropy, logitcrossentropy,
             σ, binarycrossentropy, logitbinarycrossentropy
+using Zygote
 
 const ϵ = 1e-7
 
@@ -55,9 +56,9 @@ const ϵ = 1e-7
       y = rand(T, 2)
       ŷ = rand(T, 2)
       for f in (mse, crossentropy, logitcrossentropy)
-        fwd, back = Flux.Tracker.forward(mse, ŷ, y)
-        @test typeof(fwd) == Flux.Tracker.TrackedReal{T}
-        @test eltype(back(one(T))[1]) == Flux.Tracker.TrackedReal{T}
+        fwd, back = Zygote.forward(mse, ŷ, y)
+        @test fwd isa T
+        @test eltype(back(one(T))[1]) == T
       end
     end
   end
diff --git a/test/optimise.jl b/test/optimise.jl
index 7741e872..f40567b1 100644
--- a/test/optimise.jl
+++ b/test/optimise.jl
@@ -1,13 +1,12 @@
 using Flux.Optimise
 using Flux.Optimise: runall
-using Flux.Tracker
 using Test
 @testset "Optimise" begin
   w = randn(10, 10)
   @testset for opt in [ADAMW(), ADAGrad(0.1), AdaMax(), ADADelta(0.9), AMSGrad(),
                        NADAM(), Descent(0.1), ADAM(), Nesterov(), RMSProp(),
                        Momentum()]
-    w′ = param(randn(10, 10))
+    w′ = randn(10, 10)
     loss(x) = Flux.mse(w*x, w′*x)
     for t = 1: 10^5
       θ = Params([w′])
@@ -21,7 +20,7 @@ end
 @testset "Optimiser" begin
   w = randn(10, 10)
   @testset for Opt in [InvDecay, WeightDecay, ExpDecay]
-    w′ = param(randn(10, 10))
+    w′ = randn(10, 10)
     loss(x) = Flux.mse(w*x, w′*x)
     opt = Optimiser(Opt(), ADAM(0.001))
     for t = 1:10^5
@@ -36,7 +35,7 @@ end
 
 @testset "Training Loop" begin
   i = 0
-  l = param(1)
+  l = 1
 
   Flux.train!(() -> (sleep(0.1); i += 1; l),
               (),
diff --git a/test/tracker.jl b/test/tracker.jl
index 5f3a291f..6e2e61ec 100644
--- a/test/tracker.jl
+++ b/test/tracker.jl
@@ -1,5 +1,5 @@
 using Flux, Test
-using Tracker: gradcheck
+using Zygote: gradcheck
 
 gradtest(f, xs::AbstractArray...) = gradcheck((xs...) -> sum(sin.(f(xs...))), xs...)
 gradtest(f, dims...) = gradtest(f, rand.(Float64, dims)...)
diff --git a/test/utils.jl b/test/utils.jl
index 3e76f04c..3346d4fd 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -87,12 +87,11 @@ end
 @testset "Precision" begin
   m = Chain(Dense(10, 5, relu), Dense(5, 2))
   x = rand(10)
-  @test eltype(m[1].W.data) == Float32
-  @test eltype(m(x).data) == Float32
-  @test eltype(f64(m)(x).data) == Float64
-  @test eltype(f64(m)[1].W.data) == Float64
-  @test eltype(f32(f64(m))[1].W.data) == Float32
-  @test Tracker.isleaf(f32(f64(m))[1].W)
+  @test eltype(m[1].W) == Float32
+  @test eltype(m(x)) == Float32
+  @test eltype(f64(m)(x)) == Float64
+  @test eltype(f64(m)[1].W) == Float64
+  @test eltype(f32(f64(m))[1].W) == Float32
 end
 
 @testset "Stacking" begin