diff --git a/src/backend/tensorflow/graph.jl b/src/backend/tensorflow/graph.jl index 0822ba9f..7a3734ca 100644 --- a/src/backend/tensorflow/graph.jl +++ b/src/backend/tensorflow/graph.jl @@ -76,3 +76,9 @@ function makesession(model, n) run(sess, initialize_all_variables()) sess, params, inputs, output end + +function storeparams!(sess, params) + for (p, t) in params + p.x = run(sess, t) + end +end diff --git a/src/backend/tensorflow/recurrent.jl b/src/backend/tensorflow/recurrent.jl index 514a8334..a2c8ca4a 100644 --- a/src/backend/tensorflow/recurrent.jl +++ b/src/backend/tensorflow/recurrent.jl @@ -47,19 +47,20 @@ end (m::SeqModel)(x::Seq) = first(m(batchone(x))) function Flux.train!(m::SeqModel, Xs, Ys; epoch = 1, η = 0.1, - loss = (y, ŷ) -> -reduce_sum(y .* log2(ŷ)), - opt = TensorFlow.train.GradientDescentOptimizer(η)) + loss = (y, ŷ) -> -reduce_sum(y .* log(ŷ)), + opt = () -> TensorFlow.train.GradientDescentOptimizer(η)) batchlen, seqlen = length(first(Xs)), length(first(Xs)[1]) state = batchone.(m.m.model.state) + sess, params, (instates, input), (outstates, output) = makesession(m.m.model) Y = placeholder(Float32) - Loss = loss(Y, m.m.output[end])/batchlen/seqlen - minimize_op = TensorFlow.train.minimize(opt, Loss) + Loss = loss(Y, output)/batchlen/seqlen + minimize_op = TensorFlow.train.minimize(opt(), Loss) for e in 1:epoch info("Epoch $e\n") @progress for (i, (x, y)) in enumerate(zip(Xs,Ys)) - out = run(m.m.session, vcat(m.m.output..., Loss, minimize_op), - merge(Dict(m.m.inputs[end]=>batchone(x), Y=>batchone(y)), - Dict(zip(m.m.inputs[1:end-1], state)))) + out = run(sess, vcat(outstates..., output, Loss, minimize_op), + merge(Dict(input=>batchone(x), Y=>batchone(y)), + Dict(zip(instates, state)))) state = out[1:length(state)] loss = out[end-1] isnan(loss) && error("Loss is NaN") @@ -67,4 +68,6 @@ function Flux.train!(m::SeqModel, Xs, Ys; epoch = 1, η = 0.1, (i-1) % 10 == 0 && @show loss end end + storeparams!(sess, params) + return end