From cca21a617ce13081942bfa95afef42cb55ccdb11 Mon Sep 17 00:00:00 2001
From: ylxdzsw <ylxdzsw@gmail.com>
Date: Mon, 12 Jun 2017 19:39:34 +0800
Subject: [PATCH 1/3] training julia models

---
 src/core.jl           |  4 ++--
 src/layers/affine.jl  | 13 +++++++++++++
 src/layers/control.jl | 18 +++++++++++++++++-
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/src/core.jl b/src/core.jl
index d3953849..66e33440 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -6,11 +6,11 @@ module FluxCore
 """
     back!(model, ΔY, X...) => ΔX
 
-Backpropagate the gradient `ΔY` through the model `m`, accumulating the
+Backpropagate the gradient `ΔY` through the model `model`, accumulating the
 gradients of any parameters. Returns the gradient of the input `X`. Gradients
 may be arrays or tuples of arrays (for multiple inputs/outputs).
 """
-back!(model, Δ, xs...) = error("Backprop not implemented for $(typeof(m))")
+back!(model, Δ, xs...) = error("Backprop not implemented for $(typeof(model))")
 
 """
     update!(model, η) => m
diff --git a/src/layers/affine.jl b/src/layers/affine.jl
index 9608efcc..a1df4562 100644
--- a/src/layers/affine.jl
+++ b/src/layers/affine.jl
@@ -9,3 +9,16 @@ Affine(in::Integer, out::Integer; init = initn) =
 
 inferred(::Type{Affine}, in::Tuple{Dims{2}}, out::Integer) =
   Affine(in[1][2], out)
+
+function back!(m::Affine, Δ, x)
+  W, b = m.W, m.b
+  W.Δx[:] = x' * Δ
+  b.Δx[:] = sum(Δ, 1)
+  Δ * W.x'
+end
+
+function update!(m::Affine, η)
+  update!(m.W, η)
+  update!(m.b, η)
+  m
+end
\ No newline at end of file
diff --git a/src/layers/control.jl b/src/layers/control.jl
index 7851f902..a08cb3cb 100644
--- a/src/layers/control.jl
+++ b/src/layers/control.jl
@@ -7,9 +7,25 @@ end
 @forward Chain.layers Base.start, Base.next, Base.done
 
 (s::Chain)(x) = foldl((x, m) -> m(x), x, s.layers)
-back!(s::Chain, Δ) = foldr((m, Δ) -> back!(m, Δ), Δ, s.layers)
 update!(s::Chain, η) = foreach(l -> update!(l, η), s.layers)
 
+function back!(s::Chain, Δ, xs...)
+  crumbs = Tuple[xs]
+  N = length(s.layers)
+
+  for i in 1:N-1
+    xs = s.layers[i](xs...)
+    xs isa Tuple || (xs = (xs, ))
+    push!(crumbs, xs)
+  end
+
+  for i in N:-1:1
+    Δ = back!(s.layers[i], Δ, crumbs[i]...)
+  end
+
+  Δ
+end
+
 graph(s::Chain) =
   foldl((v, m) -> vertex(m, v), constant(inputnode(1)), s.layers)
 

From c9ae2196137082bbdb76dca00dbb001cd9422469 Mon Sep 17 00:00:00 2001
From: ylxdzsw <ylxdzsw@gmail.com>
Date: Wed, 14 Jun 2017 21:58:37 +0800
Subject: [PATCH 2/3] simplify `back!` of `Chain`

---
 src/layers/affine.jl  |  2 +-
 src/layers/control.jl | 18 ++++++------------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/layers/affine.jl b/src/layers/affine.jl
index a1df4562..ca79c004 100644
--- a/src/layers/affine.jl
+++ b/src/layers/affine.jl
@@ -21,4 +21,4 @@ function update!(m::Affine, η)
   update!(m.W, η)
   update!(m.b, η)
   m
-end
\ No newline at end of file
+end
diff --git a/src/layers/control.jl b/src/layers/control.jl
index a08cb3cb..d0c5e61b 100644
--- a/src/layers/control.jl
+++ b/src/layers/control.jl
@@ -9,21 +9,15 @@ end
 (s::Chain)(x) = foldl((x, m) -> m(x), x, s.layers)
 update!(s::Chain, η) = foreach(l -> update!(l, η), s.layers)
 
-function back!(s::Chain, Δ, xs...)
-  crumbs = Tuple[xs]
-  N = length(s.layers)
-
-  for i in 1:N-1
-    xs = s.layers[i](xs...)
-    xs isa Tuple || (xs = (xs, ))
-    push!(crumbs, xs)
+function back!(s::Chain, Δ, x)
+  crumbs = foldl([x], s.layers[1:end-1]) do crumbs, layer
+    push!(crumbs, layer(crumbs[end]))
   end
 
-  for i in N:-1:1
-    Δ = back!(s.layers[i], Δ, crumbs[i]...)
+  foldr(Δ, collect(zip(crumbs, s.layers))) do pack, Δ
+    x, layer = pack
+    back!(layer, Δ, x)
   end
-
-  Δ
 end
 
 graph(s::Chain) =

From f64dca2df6508df30c03a2e1709dea61e0f11071 Mon Sep 17 00:00:00 2001
From: ylxdzsw <ylxdzsw@gmail.com>
Date: Mon, 26 Jun 2017 17:21:17 +0800
Subject: [PATCH 3/3] add test for optimizers

---
 test/optimizer.jl | 38 ++++++++++++++++++++++++++++++++++++++
 test/runtests.jl  |  2 ++
 2 files changed, 40 insertions(+)
 create mode 100644 test/optimizer.jl

diff --git a/test/optimizer.jl b/test/optimizer.jl
new file mode 100644
index 00000000..57f1d011
--- /dev/null
+++ b/test/optimizer.jl
@@ -0,0 +1,38 @@
+@testset "training julia models" begin
+
+    @testset "linear regression" begin
+        srand(0)
+
+        model = Affine(10, 1)
+
+        truth = Float32[0, 4, 2, 2, -3, 6, -1, 3, 2, -5]'
+
+        data = map(1:256) do i
+            x = rand(Float32, 10)
+            x, truth * x + 3rand(Float32)
+        end
+
+        Flux.train!(model, data, epoch=5)
+
+        @test cor(reshape.((model.W.x, truth), 10)...) > .99
+    end
+
+    @testset "logistic regression" begin
+        srand(0)
+
+        model = Chain(Affine(10, 1), σ)
+
+        truth = Float32[0, 4, 2, 2, -3, 6, -1, 3, 2, -5]'
+
+        data = map(1:256) do i
+            x = rand(Float32, 10)
+            x, truth * x + 2rand(Float32) > 5f0
+        end
+
+        Flux.train!(model, data, epoch=10)
+
+        @test cor(reshape.((model.layers[1].W.x, truth), 10)...) > .99
+    end
+
+end
+
diff --git a/test/runtests.jl b/test/runtests.jl
index 8dd1dd8e..1e4981f7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,5 +15,7 @@ include("backend/common.jl")
 
 include("basic.jl")
 include("recurrent.jl")
+include("optimizer.jl")
+
 @tfonly include("backend/tensorflow.jl")
 @mxonly include("backend/mxnet.jl")