From dd82edd818017abad87452d989ec2f8dfa99f25b Mon Sep 17 00:00:00 2001 From: ylxdzsw Date: Fri, 21 Jul 2017 11:22:40 +0800 Subject: [PATCH 1/2] fix training on the tensorflow backend --- src/backend/tensorflow/model.jl | 48 ++++++++++++++-------------- src/backend/tensorflow/tensorflow.jl | 2 +- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/backend/tensorflow/model.jl b/src/backend/tensorflow/model.jl index 836861a0..b6db200b 100644 --- a/src/backend/tensorflow/model.jl +++ b/src/backend/tensorflow/model.jl @@ -4,18 +4,20 @@ struct Exec session ::Session input ::Any output ::Any - grads ::Any - params ::Dict{Flux.Param,Tensor} + params ::Dict{Param,Param{Tensor}} stacks ::Dict{Any,Any} end function makesession(model, inputs; session = Session(Graph())) inputs = mapt(_ -> placeholder(Float32), inputs) params, stacks, output = tograph(model, inputs...) - # grads = gradients(output, [collectt(inputs)..., values(params)...]) - grads = placeholder(Float32) + output = mapt(x->Param{Tensor}(x, placeholder(Float32)), output) + params = Dict(x=>Param{Tensor}(y, gradients(mapt(x->x.x, output), + y, mapt(x->x.Δx, output))) for (x, y) in params) + inputs = mapt(x->Param{Tensor}(x, gradients(mapt(x->x.x, output), + x, mapt(x->x.Δx, output))), inputs) run(session, global_variables_initializer()) - Exec(session, inputs, output, grads, params, stacks) + Exec(session, inputs, output, params, stacks) end retuple(xs) = xs @@ -23,35 +25,33 @@ retuple(xs::AbstractArray{<:AbstractArray}) = (retuple.(xs)...,) dictt(xs, ys) = Dict(zip(collectt(xs), collectt(ys))) -function params(m::Exec, args...) - shapecheckt(m.input, args) - idict = dictt(m.input, args) - pdict = Dict(t => p.x for (p, t) in m.params) - merge(idict, pdict) -end - function (m::Exec)(args...) - retuple(run(m.session, m.output, params(m, args...))) + dict = merge( + Dict(y.x=>x.x for (x, y) in m.params), + Dict(x.x=>y for (x, y) in zip(m.input, args)) + ) + retuple(run(m.session, mapt(x->x.x, m.output), dict)) end -pullt!(_, xs) = shift!(xs) -pullt!(x::Tuple, xs) = map(x -> pullt!(x, xs), x) - -# TODO: gradients don't work yet -# `gradients` lacks support for `grad_y`s and multiple `y`s - function Flux.back!(m::Exec, Δ, args...) - Δps = run(m.session, m.grads, params(m, args...)) - Δin = pullt!(m.input, Δps) + dict = merge( + Dict(y.x=>x.x for (x, y) in m.params), + Dict(x.x=>y for (x, y) in zip(m.input, args)), + Dict(x.Δx=>y for (x, y) in zip(collectt(m.output), collectt(Δ))) + ) + + Δin, Δps = run(m.session, (mapt(x->x.Δx, m.input), map(x->x.Δx, values(m.params))), dict) + for (p, Δ) in zip(keys(m.params), Δps) p.Δx .+= Δ end + Δin end function Flux.update!(m::Exec, η) for p in keys(m.params) - update!(p, η) + Flux.update!(p, η) end return m end @@ -70,8 +70,8 @@ function (m::Model)(args...) @tferr m.exec.stacks m.exec(args...) end -Flux.back!(m::Model, Δ, args...) = back!(m.exec, Δ, args...) -Flux.update!(m::Model, η) = (update!(m.exec, η); m) +Flux.back!(m::Model, Δ, args...) = Flux.back!(m.exec, Δ, args...) +Flux.update!(m::Model, η) = (Flux.update!(m.exec, η); m) # Recurrent Models diff --git a/src/backend/tensorflow/tensorflow.jl b/src/backend/tensorflow/tensorflow.jl index 74c94012..7536bdbc 100644 --- a/src/backend/tensorflow/tensorflow.jl +++ b/src/backend/tensorflow/tensorflow.jl @@ -1,7 +1,7 @@ module TF using ..Flux, DataFlow, TensorFlow, Juno -import Flux: accuracy, convertel +import Flux: accuracy, convertel, Param export tf From 3000c7bbcb33e54a4d2ab59f6f8eaab5b69408b2 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Thu, 27 Jul 2017 20:43:53 +0100 Subject: [PATCH 2/2] style tweak --- src/backend/tensorflow/model.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backend/tensorflow/model.jl b/src/backend/tensorflow/model.jl index b6db200b..cdb7f5e5 100644 --- a/src/backend/tensorflow/model.jl +++ b/src/backend/tensorflow/model.jl @@ -13,9 +13,11 @@ function makesession(model, inputs; session = Session(Graph())) params, stacks, output = tograph(model, inputs...) output = mapt(x->Param{Tensor}(x, placeholder(Float32)), output) params = Dict(x=>Param{Tensor}(y, gradients(mapt(x->x.x, output), - y, mapt(x->x.Δx, output))) for (x, y) in params) + y, mapt(x->x.Δx, output))) + for (x, y) in params) inputs = mapt(x->Param{Tensor}(x, gradients(mapt(x->x.x, output), - x, mapt(x->x.Δx, output))), inputs) + x, mapt(x->x.Δx, output))), + inputs) run(session, global_variables_initializer()) Exec(session, inputs, output, params, stacks) end