diff --git a/src/Flux.jl b/src/Flux.jl index f8db5553..3d50efc1 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -38,8 +38,6 @@ include("layers/cost.jl") include("layers/recurrent.jl") include("layers/shims.jl") -include("backend/backend.jl") - include("data.jl") include("training.jl") diff --git a/src/backend/backend.jl b/src/backend/backend.jl deleted file mode 100644 index ebf80ee0..00000000 --- a/src/backend/backend.jl +++ /dev/null @@ -1,28 +0,0 @@ -# We use a lazy-loading trick to load the backend code as needed; this avoids -# the need for a hard dependency on both backends. - -# This is effectively equivalent to: -# include("tensorflow/tensorflow.jl") -# using .TF -# export tf -# but instead of loading immediately, we wait until `tf` is first called. - -function loadtf() - isdefined(Flux, :TF) && return - @eval include(joinpath(dirname($@__FILE__), "tensorflow/tensorflow.jl")) -end - -function tf(args...) - loadtf() - eval(:(TF.tf($(QuoteNode.(args)...)))) -end - -function loadmx() - isdefined(Flux, :MX) && return - @eval include(joinpath(dirname($@__FILE__), "mxnet/mxnet.jl")) -end - -function mxnet(args...) - loadmx() - eval(:(MX.mxnet($(QuoteNode.(args)...)))) -end diff --git a/src/backend/mxnet/graph.jl b/src/backend/mxnet/graph.jl deleted file mode 100644 index 06062eb8..00000000 --- a/src/backend/mxnet/graph.jl +++ /dev/null @@ -1,142 +0,0 @@ -function nodename(s::mx.SymbolicNode) - name = Ref{mx.char_p}(0) - success = Ref(0) - mx.@mxcall(:MXSymbolGetName, (mx.MX_handle, Ref{mx.char_p}, Ref{Int}), s.handle.value, name, success) - @assert success[] != -1 - return Symbol(unsafe_string(name[])) -end - -using Base: @get! -using DataFlow: Constant, constant -using DataFlow.Interpreter -using DataFlow.Interpreter: Exception, totrace -import Flux: Reshape, MaxPool, flatten, mapt, broadcastto, ∘ - -# TODO: implement Julia's type promotion rules - -node(x::Tuple) = map(node, x) -node(x::mx.SymbolicNode) = x - -graph(::typeof(tuple), args...) = (args...,) -graph(::typeof(identity), x) = x -graph(::typeof(*), xs...) = mx.dot(reverse(xs)...) # Work around MXNet shape hack -graph(::typeof(σ), x) = mx.Activation(x, act_type = :sigmoid) -graph(::typeof(relu), x) = mx.Activation(x, act_type = :relu) -graph(::typeof(tanh), x) = mx.Activation(x, act_type = :tanh) -graph(::typeof(flatten), x) = mx.Flatten(x) -graph(::typeof(hcat), xs...) = mx.concat(xs..., dim = 2-1) -graph(::typeof(vec), xs) = reshape(xs, shape = (-1,)) - -graph(::typeof(broadcast), ::typeof(+), args...) = mx.broadcast_plus(args...) -graph(::typeof(broadcast), ::typeof(-), args...) = mx.broadcast_sub(args...) -graph(::typeof(broadcast), ::typeof(*), args...) = mx.broadcast_mul(args...) -graph(::typeof(broadcast), ::typeof(/), args...) = mx.broadcast_div(args...) -graph(::typeof(broadcastto), xs, shape) = mx.broadcast_to(xs, shape = map(i -> i≤1?0:i, reverse(shape))) -# Old broadcasters -graph(::typeof(broadcast), ::typeof(exp), xs) = exp(xs) -graph(::typeof(.+), args...) = mx.broadcast_plus(args...) -graph(::typeof(.*), args...) = mx.broadcast_mul(args...) -graph(::typeof(.-), args...) = mx.broadcast_sub(args...) - -graph(::typeof(softmax), xs) = - mx.broadcast_div(exp(xs), mx.sum(exp(xs), axis = 1, keepdims=true)) - -graph(::typeof(cat), dim::Integer, a...) = mx.Concat(a..., dim = dim) -graph(::typeof(vcat), a...) = graph(cat, 1, a...) - -graph(::typeof(map), f, xss::Tuple...) = map(f, xss...) -graph(::typeof(getindex), t::Tuple, n::Integer) = t[n] -graph(::typeof(sum), xs::Tuple) = reduce((a, b) -> graph(broadcast, +, a, b), xs) -graph(::typeof(Base.Iterators.repeated), x, n) = ntuple(_ -> x, n) - -a::mx.SymbolicNode ∘ b::mx.SymbolicNode = mx.broadcast_mul(a, b) - -graph(::Input, x) = x - -struct AlterParam - param - load - store -end - -Base.size(p::AlterParam) = size(p.load(p.param.x)) -Base.copy!(xs, p::AlterParam) = copy!(xs, p.load(p.param.x)) - -graph(ctx::Context, d::Affine, x) = - !ctx[:feedforward] ? invoke(graph, Tuple{Context, Any, typeof(x)}, ctx, d, x) : - register(ctx, - mx.FullyConnected(mx.SymbolicNode, data = x, - num_hidden = size(d.W.x, 2), - weight = var(ctx, AlterParam(d.W, x->x', nothing)), - bias = var(ctx, AlterParam(d.b, x->squeeze(x, 1), nothing)))) - -# TODO: use actual params -graph(ctx::Context, c::Conv2D, x) = - mx.Convolution(x, - kernel = size(c.filter, 1, 2), - num_filter = size(c.filter, 4), - stride = c.stride) - -graph(ctx::Context, p::MaxPool, x) = - mx.Pooling(x, - pool_type = :max, - kernel = p.size, - stride = p.stride) - -function register(ctx::Context, node::mx.SymbolicNode) - ctx[:stacks][nodename(node)] = stack(ctx) - return node -end - -register(ctx::Context, node) = node - -function var(ctx::Context, p::Union{Flux.Param{<:AbstractArray},AbstractArray,AlterParam}) - haskey(ctx[:params], p) && return ctx[:params][p] - ctx[:params][p] = mx.Variable(gensym()) -end - -var(ctx::Context, x) = x - -function graph(ctx::Context, model, args...) - args = var.(ctx, args) - g = Flux.graph(model) - g == nothing && return register(ctx, @icatch ctx graph(model, args...)) - DataFlow.iscyclic(g) && error("This model has a cycle; try unrolling it first.") - interpret(ctx, g, args...) -end - -graph′(ctx::Context, args...) = @icatch ctx graph(ctx, args...) - -function tograph(model, args...; feedforward = false) - ctx = Context(mux(iline, iconst, ilambda, iargs, ituple, graph′), - params = ObjectIdDict(), stacks = Dict(), - feedforward = feedforward) - out = @ithrow graph(ctx, model, mapt(mx.Variable, args)...) - params = Dict(nodename(v) => p for (p, v) in ctx[:params]) - return Graph(args, out, params, ctx[:stacks]) -end - -# Error Handling - -using Juno -using MacroTools: @q -Juno.errmsg(e::mx.MXError) = e.msg - -function errnode(e::mx.MXError) - m = match(r"Error in operator (\w+)", e.msg) - m == nothing && return - Symbol(m.captures[1]) -end - -striptrace(e::mx.MXError) = mx.MXError(split(e.msg, "\n")[1]) - -macro mxerr(stk, ex) - @q try - $(esc(ex)) - catch e - (e isa mx.MXError && (node = errnode(e)) != nothing) || rethrow() - stk = $(esc(stk)) - haskey(stk, node) || rethrow() - throw(Exception(striptrace(e), totrace(stk[node]))) - end -end diff --git a/src/backend/mxnet/model.jl b/src/backend/mxnet/model.jl deleted file mode 100644 index 3ea9ea12..00000000 --- a/src/backend/mxnet/model.jl +++ /dev/null @@ -1,159 +0,0 @@ -using Flux: collectt, shapecheckt, back!, update! - -function copyargs!(as, bs) - for id in intersect(keys(as), keys(bs)) - copy!(as[id], bs[id]) - end -end - -struct Graph - input - output - params::Dict{Symbol,Any} - stacks::Dict{Any,Any} -end - -function mxparams(ps, ctx) - params = Dict{Symbol,MXArray}() - for (name, param) in ps - params[name] = MXArray(size(param), ctx) - end - return params -end - -ndparams(d) = Dict{Symbol,mx.NDArray}(k => v.data for (k, v) in d) - -struct Exec - graph::Graph - ctx::mx.Context - exec::mx.Executor - args::Dict{Symbol,MXArray} - grads::Dict{Symbol,MXArray} - outs::Vector{MXArray} -end - -loadparams!(exec::Exec) = copyargs!(exec.args, exec.graph.params) -storeparams!(exec::Exec) = copyargs!(exec.graph.params, exec.args) - -mxgroup(x) = x -mxgroup(x::Tuple) = mx.Group(mxgroup.(x)...) -mxungroup(x, outs) = copy(shift!(outs)) -mxungroup(x::Tuple, outs) = map(x -> mxungroup(x, outs), x) - -dictt(xs, ys) = Dict(zip(collectt(xs), collectt(ys))) - -function executor(graph::Graph, input...; ctx = mx.cpu()) - shapecheckt(graph.input, input) - args = merge(mxparams(graph.params, ctx), dictt(graph.input, mapt(d->MXArray(size(d), ctx), input))) - grads = filter((a, b) -> b isa Flux.Param, graph.params) - grads = merge(mxparams(grads, ctx), dictt(graph.input, mapt(d->MXArray(size(d), ctx), input))) - exec = mx.bind(mxgroup(graph.output), - context = ctx, - args = ndparams(args), - args_grad = ndparams(grads), - grad_req = mx.GRAD_ADD) - exec = Exec(graph, ctx, exec, args, grads, MXArray.(exec.outputs)) - loadparams!(exec) - return exec -end - -function (exec::Exec)(input...) - foreach(kv -> copy!(exec.args[kv[1]], kv[2]), dictt(exec.graph.input, input)) - mx.forward(exec.exec, is_train = true) - mxungroup(exec.graph.output, copy(exec.outs)) -end - -function Flux.back!(exec::Exec, Δ) - mapt(k -> exec.grads[k][:] = 0, exec.graph.input) - mx.backward(exec.exec, map(x -> MXArray(x, exec.ctx).data, collectt(Δ))) - mapt(k -> copy(exec.grads[k]), exec.graph.input) -end - -function Flux.update!(exec::Exec, η) - for (arg, grad) in zip(exec.exec.arg_arrays, exec.exec.grad_arrays) - grad == nothing && continue - mx.@nd_as_jl rw = (arg, grad) begin - arg .-= grad .* η - grad[:] = 0 - end - end - storeparams!(exec) - return exec -end - -toctx(ctx::mx.Context) = ctx -toctx(c::Symbol) = c == :gpu ? mx.gpu() : mx.cpu() - -# TODO: if `last` changes, update params appropriately - -mutable struct Model - model::Any - ctx::mx.Context - execs::Dict{Tuple,Exec} - graph::Graph - last::Exec - Model(model, ctx) = new(model, ctx, Dict()) -end - -mxnet(model, ctx = :cpu) = Model(model, toctx(ctx)) - -function Base.show(io::IO, m::Model) - print(io, "MX.Model(") - show(io, m.model) - print(io, ", ") - show(io, m.ctx) - print(io, ")") -end - -import Base: @get! - -# TODO: dims having its own type would be useful -executor(m::Model, input...) = - @get!(m.execs, mapt(size, input), - executor(m.graph, input...; ctx = m.ctx)) - -function (m::Model)(xs...) - @mxerr m.graph.stacks begin - !isdefined(m, :graph) && - (m.graph = tograph(m.model, mapt(_ -> gensym("input"), xs)...)) - m.last = exec = executor(m, xs...) - exec(xs...) - end -end - -function Flux.back!(m::Model, Δ, xs...) - m.last = exec = m.execs[mapt(size, xs)] - back!(exec, Δ) -end - -Flux.update!(m::Model, η) = (update!(m.last, η); m) - -# Recurrent Models - -using Flux: Stateful, SeqModel - -mxnet(m::Stateful, a...) = Stateful(mxnet(m.model, a...), m.states, m.istate, m.ostate) -mxnet(m::SeqModel, a...) = SeqModel(mxnet(m.model, a...), m.steps) - -# MX FeedForward interface - -struct SoftmaxOutput - name::Symbol -end - -graph(s::SoftmaxOutput, xs) = mx.SoftmaxOutput(xs, name = s.name) - -function rewrite_softmax(model, name) - model == softmax && return SoftmaxOutput(name) - g = Flux.graph(model) - (g == nothing || g.value ≠ softmax || DataFlow.nin(g) ≠ 1) && error("mx.FeedForward models must end with `softmax`") - return Flux.Capacitor(vertex(SoftmaxOutput(name), g[1])) -end - -function FeedForward(model; input = :data, label = :softmax, ctx = mx.cpu()) - model = rewrite_softmax(model, label) - graph = tograph(model, input, feedforward=true) - ff = mx.FeedForward(graph.output, context = ctx) - isempty(graph.params) || (ff.arg_params = ndparams(mxparams(graph.params, ctx))) - return ff -end diff --git a/src/backend/mxnet/mxarray.jl b/src/backend/mxnet/mxarray.jl deleted file mode 100644 index ecfe7439..00000000 --- a/src/backend/mxnet/mxarray.jl +++ /dev/null @@ -1,40 +0,0 @@ -using MXNet - -# NDArray is row-major so by default all dimensions are reversed in MXNet. -# MXArray tranposes when loading/storing to fix this. - -reversedims!(dest, xs) = permutedims!(dest, xs, ndims(xs):-1:1) - -struct MXArray{N} - data::mx.NDArray - scratch::Array{Float32,N} -end - -MXArray(data::mx.NDArray) = MXArray(data, Array{Float32}(size(data))) - -# TODO: split cpu/gpu mxarrays -MXArray(dims::Dims, ctx = mx.cpu()) = MXArray(mx.zeros(reverse(dims), ctx)) - -Base.size(xs::MXArray) = reverse(size(xs.data)) - -function Base.copy!(mx::MXArray, xs::AbstractArray) - @assert size(mx) == size(xs) - reversedims!(mx.scratch, xs) - copy!(mx.data, mx.scratch) - return mx -end - -function Base.copy!(xs::AbstractArray, mx::MXArray) - @assert size(xs) == size(mx) - copy!(mx.scratch, mx.data) - reversedims!(xs, mx.scratch) -end - -Base.copy(mx::MXArray) = copy!(Array{Float32}(size(mx)), mx) - -function MXArray(xs::AbstractArray, ctx = mx.cpu()) - mx = MXArray(size(xs), ctx) - copy!(mx, xs) -end - -Base.setindex!(xs::MXArray, x::Real, ::Colon) = xs.data[:] = x diff --git a/src/backend/mxnet/mxnet.jl b/src/backend/mxnet/mxnet.jl deleted file mode 100644 index fb9db1dd..00000000 --- a/src/backend/mxnet/mxnet.jl +++ /dev/null @@ -1,11 +0,0 @@ -module MX - -using MXNet, DataFlow, ..Flux - -export mxnet - -include("mxarray.jl") -include("graph.jl") -include("model.jl") - -end diff --git a/src/backend/tensorflow/graph.jl b/src/backend/tensorflow/graph.jl deleted file mode 100644 index cf01de74..00000000 --- a/src/backend/tensorflow/graph.jl +++ /dev/null @@ -1,133 +0,0 @@ -using Base: @get! -using Flux: Reshape, MaxPool, flatten -using DataFlow: constant, Split -using DataFlow.Interpreter -using DataFlow.Interpreter: stack -using TensorFlow: RawTensor, TFException - -# TODO: implement Julia's type promotion rules - -node(x::Tuple) = map(node, x) -node(x::Tensor) = x -node(x::Variable) = x -node(x::Number) = TensorFlow.constant(Float32(x)) - -graph(::typeof(tuple), args...) = (args...,) -graph(s::Split, t::Tuple) = t[s.n] -graph(::typeof(getindex), t::Tuple, n::Integer) = t[n] -graph(::typeof(identity), x) = TensorFlow.identity(x) -graph(::typeof(softmax), x) = nn.softmax(x) -graph(::typeof(relu), x) = nn.relu(x) -graph(::typeof(σ), x) = nn.sigmoid(x) -graph(::typeof(hcat), xs...) = concat(1, xs) -graph(::typeof(sum), x, dim=nothing) = TensorFlow.reduce_sum(x;axis=dim) -graph(::typeof(prod), x, dim=nothing) = TensorFlow.reduce_prod(x;axis=dim) -graph(::typeof(min), x, dim=nothing) = TensorFlow.reduce_min(x;axis=dim) -graph(::typeof(max), x, dim=nothing) = TensorFlow.reduce_max(x;axis=dim) -graph(::typeof(all), x, dim=nothing) = TensorFlow.reduce_all(x;axis=dim) -graph(::typeof(any), x, dim=nothing) = TensorFlow.reduce_any(x;axis=dim) -graph(::typeof(mean), x, dim=nothing) = TensorFlow.reduce_mean(x;axis=dim) -graph(::typeof(svd), x) = svd(x) -graph(::typeof(size), x, dim) = TensorFlow.size(x,convert(Tensor{Int32}, dim)) -graph(::typeof(size), x) = TensorFlow.size(x) -graph(::typeof(chol), args...) = TensorFlow.transpose(TensorFlow.cholesky(args...)) -graph(::typeof(reshape), x, dims) = TensorFlow.reshape(x,convert(Tensor{Int32},dims)) -graph(::typeof(Flux.tile), args...) = TensorFlow.tile(args...) -graph(::typeof(fill), x, dims) = Ops.fill(convert(Tensor{Int32}, dims), Tensor(x)) -graph(::typeof(Flux.cast), args...) = TensorFlow.cast(args...) -graph(::typeof(solve), A, b) = TensorFlow.matrix_solve(A, b) -graph(::typeof(triangular_solve), A, b) = TensorFlow.matrix_triangular_solve(A, b; lower=false) -graph(::typeof(randu), x) = Ops.random_uniform(convert(Tensor{Int32},x);dtype=Float32) -graph(::typeof(randn), x) = TensorFlow.random_normal(convert(Tensor{Int32},x);dtype=Float32) -graph(::typeof(Flux.expand_dims), x, dim) = TensorFlow.expand_dims(x,convert(Tensor{Int32},dim)) - -for op in (*, .*, .+, .^, log, exp, ceil, floor, sqrt, abs, cos, - sin, tan, atan, asin, acos, tanh, lgamma, erf, erfc, real, imag, conj, - inv, det, transpose, permutedims, cat, length, diag, diagm) - @eval graph(::typeof($op), args...) = $op(args...) -end - -for op in (+, -, *, /) - @eval graph(::typeof(broadcast), ::typeof($op), args...) = broadcast($op, args...) -end - -graph(::typeof(.-), args...) = -(args...) - -graph(::typeof(map), f, xss::Tuple...) = map(f, xss...) - -# reshape hack due to https://github.com/malmaud/TensorFlow.jl/issues/79 -batchsize(x::Tensor) = reduce_sum(slice(TensorFlow.shape(x), [0], [1])) -graph(::typeof(flatten), x) = reshape(x, pack([batchsize(x), Int32(-1)])) -graph(r::Reshape, x) = reshape(x, pack([batchsize(x), map(Int32, r.dims)...])) - -graph(::Input, x) = x - -graph(p::MaxPool, x) = - nn.max_pool(x, [1, p.size..., 1], [1, p.stride..., 1], "VALID") - -graph(op::Op, xs...) = op.f(xs...) - -function graph(ctx::Context, model, args...) - node = graph(model, args...) - node isa Tensor && (ctx[:stacks][node.op.name] = stack(ctx)) - return node -end - -interp(ctx, c::Conv2D, x) = - nn.conv2d(x, interp(ctx, constant(c.filter)), [1,c.stride...,1], "VALID") - -param(ctx, p::Flux.Param{<:AbstractArray}) = - haskey(ctx[:params], p) ? - ctx[:params][p] : - (ctx[:params][p] = - ctx[:variables] ? - Variable(Float32.(p.x)) : - placeholder(Float32)) - -param(ctx, x) = x - -function interp(ctx, model, args...) - args = param.(ctx, args) - g = Flux.graph(model) - g == nothing && return graph(ctx, model, args...) - DataFlow.iscyclic(g) && error("This model has a cycle; try unrolling it first.") - interpret(ctx, g, args...) -end - -function tograph(model, args...; variables = false) - ctx = Context(mux(iline, iconst, ilambda, iargs, ituple, interp), - params = ObjectIdDict(), stacks = Dict(), variables = variables) - out = interp(ctx, model, map(constant, args)...) - return ctx[:params], ctx[:stacks], out -end - -astensor(model, args...) = - tograph(model, args...; variables = true)[3] - -RawTensor(data::Union{Flux.Batch,Flux.Seq}) = RawTensor(Flux.rawbatch(data)) - -# Error Handling - -using Juno -using MacroTools: @q -using DataFlow.Interpreter: Exception, totrace -Juno.errmsg(e::TFException) = string(e.status) - -function errnode(e::TFException) - m = match(r"Node: ([\w\d]+) =", string(e.status)) - m == nothing && return - m.captures[1] -end - -errnode(e) = nothing - -macro tferr(stk, ex) - @q try - $(esc(ex)) - catch e - (node = errnode(e)) != nothing || rethrow() - stk = $(esc(stk)) - haskey(stk, node) || rethrow() - throw(Exception(e, totrace(stk[node]))) - end -end diff --git a/src/backend/tensorflow/model.jl b/src/backend/tensorflow/model.jl deleted file mode 100644 index 512fbc15..00000000 --- a/src/backend/tensorflow/model.jl +++ /dev/null @@ -1,86 +0,0 @@ -using Flux: Param, mapt, collectt, shapecheckt - -struct Exec - session ::Session - input ::Any - output ::Any - params ::Dict{Param,Param{Tensor}} - stacks ::Dict{Any,Any} -end - -dummy(x::Void) = TensorFlow.constant(0) -dummy(x::Tensor) = x - -function makesession(model, inputs; session = Session(Graph())) - inputs = mapt(_ -> placeholder(Float32), inputs) - params, stacks, output = tograph(model, inputs...) - output = mapt(x->Param{Tensor}(x, placeholder(Float32)), output) - params = Dict(x=>Param{Tensor}(y, dummy(gradients(map(x->x.x, collectt(output)), - y, map(x->x.Δx, collectt(output))))) - for (x, y) in params) - inputs = mapt(x->Param{Tensor}(x, dummy(gradients(map(x->x.x, collectt(output)), - x, map(x->x.Δx, collectt(output))))), - inputs) - run(session, global_variables_initializer()) - Exec(session, inputs, output, params, stacks) -end - -retuple(xs) = xs -retuple(xs::AbstractArray{<:AbstractArray}) = (retuple.(xs)...,) - -dictt(xs, ys) = Dict(zip(collectt(xs), collectt(ys))) - -function (m::Exec)(args...) - dict = merge( - Dict(y.x=>x.x for (x, y) in m.params), - Dict(x.x=>y for (x, y) in dictt(m.input, args)) - ) - retuple(run(m.session, mapt(x->x.x, m.output), dict)) -end - -function Flux.back!(m::Exec, Δ, args...) - dict = merge( - Dict(y.x=>x.x for (x, y) in m.params), - Dict(x.x=>y for (x, y) in zip(m.input, args)), - Dict(x.Δx=>y for (x, y) in zip(collectt(m.output), collectt(Δ))) - ) - - Δin, Δps = run(m.session, (mapt(x->x.Δx, m.input), map(x->x.Δx, values(m.params))), dict) - - for (p, Δ) in zip(keys(m.params), Δps) - p.Δx .+= Δ - end - - Δin -end - -function Flux.update!(m::Exec, η) - for p in keys(m.params) - Flux.update!(p, η) - end - return m -end - -mutable struct Model - model::Any - exec::Exec - Model(model) = new(model) -end - -tf(model) = Model(model) - -function (m::Model)(args...) - args = mapt(x->Float32.(x), args) - isdefined(m, :exec) || (m.exec = makesession(m.model, args)) - @tferr m.exec.stacks m.exec(args...) -end - -Flux.back!(m::Model, Δ, args...) = Flux.back!(m.exec, Δ, args...) -Flux.update!(m::Model, η) = (Flux.update!(m.exec, η); m) - -# Recurrent Models - -using Flux: Stateful, SeqModel - -tf(m::Stateful) = Stateful(tf(m.model), m.states, m.istate, m.ostate) -tf(m::SeqModel) = SeqModel(tf(m.model), m.steps) diff --git a/src/backend/tensorflow/tensorflow.jl b/src/backend/tensorflow/tensorflow.jl deleted file mode 100644 index 74c94012..00000000 --- a/src/backend/tensorflow/tensorflow.jl +++ /dev/null @@ -1,20 +0,0 @@ -module TF - -using ..Flux, DataFlow, TensorFlow, Juno -import Flux: accuracy, convertel - -export tf - -struct Op - f - shape -end - -Op(f) = Op(f, (d...) -> nothing) - -Flux.shape(op::Op, d...) = op.shape(d...) - -include("graph.jl") -include("model.jl") - -end diff --git a/test/backend/mxnet.jl b/test/backend/mxnet.jl deleted file mode 100644 index cad5407e..00000000 --- a/test/backend/mxnet.jl +++ /dev/null @@ -1,39 +0,0 @@ -using MXNet -Flux.loadmx() - -@testset "MXNet" begin - -xs, ys = rand(1, 20), rand(1, 20) -d = Affine(20, 10) - -dm = mxnet(d) -@test d(xs) ≈ dm(xs) - -test_tupleio(mxnet) -test_recurrence(mxnet) -test_stacktrace(mxnet) -test_back(mxnet) -test_anon(mxnet) - -using Flux: MaxPool - -@testset "Native interface" begin - f = Flux.MX.FeedForward(Chain(d, softmax)) - @test mx.infer_shape(f.arch, data = (20, 1))[2] == [(10, 1)] - - m = Chain(Input(28,28), Conv2D((5,5), out = 3), MaxPool((2,2)), - flatten, Affine(1587, 10), softmax) - f = Flux.MX.FeedForward(m) - # TODO: test run - @test mx.infer_shape(f.arch, data = (20, 20, 5, 1))[2] == [(10, 1)] -end - -@testset "Duplicate parameters" begin - a = Affine(10, 10) - d = Chain(a, a) - m = mxnet(d) - m(randn(1, 10)) - @test length(m.graph.params) == 2 -end - -end diff --git a/test/backend/tensorflow.jl b/test/backend/tensorflow.jl deleted file mode 100644 index 72ccd47f..00000000 --- a/test/backend/tensorflow.jl +++ /dev/null @@ -1,70 +0,0 @@ -using TensorFlow -Flux.loadtf() - -@testset "TensorFlow" begin - -xs, ys = rand(1, 20), rand(1, 20) -d = Affine(20, 10) - -dt = tf(d) -@test d(xs) ≈ dt(xs) - -test_tupleio(tf) -test_recurrence(tf) -test_stacktrace(tf) -test_anon(tf) - -@testset "Tensor interface" begin - sess = TensorFlow.Session() - X = placeholder(Float32) - Y = Flux.TF.astensor(d, X) - run(sess, global_variables_initializer()) - - @test run(sess, Y, Dict(X=>xs)) ≈ d(xs) -end - -@testset "Ops" begin - A = randn(Float32,(5,5)) - # u,s,v = tf(@net x -> svd(x))(A) - # @test A ≈ u*diagm(s)*transpose(v) - @test tf(@net x -> inv(x))(A) ≈ inv(A) - @test tf(@net x -> det(x))(A) ≈ det(A) - A = randn(Float32,(6,3)) - @test tf(@net x -> transpose(x))(A) ≈ transpose(A) - A = randn(Float32,(6,3,2)) - @test tf(@net (x,y) -> permutedims(x,y))(A,[3,2,1]) ≈ permutedims(A,[3,2,1]) - A1 = randn(Float32,(4,1)) - A2 = randn(Float32,(4,1)) - @test tf(@net (x,y) -> cat(2,x,y))(A1,A2) ≈ cat(2,A1,A2) - @test tf(@net x -> length(x))(A1) == length(A1) - A = randn(Float32,(5,5)) - @test tf(@net x -> diag(x))(A) ≈ diag(A) - A = randn(Float32,(5,)) - @test tf(@net x -> diagm(x))(A) ≈ diagm(A) - A = randn(4,5) - @test tf(@net x -> size(x))(A) == [4,5] - @test tf(@net (x,y) -> size(x,y))(A,1) == 4 - A = randn(6,5) - A = A'*A - @test tf(@net x -> chol(x))(A) ≈ chol(A) - A = randn(Float32,(6,3)) - @test transpose(tf(@net (x,y) -> reshape(x,y))(transpose(A),[2,9])) ≈ reshape(A,(9,2)) # Note: TF is row major and julia is not - A = randn(Float32,(4,3,1)) - @test tf(@net (x,y) -> Flux.tile(x,y))(A,[1,1,3]) ≈ repeat(A,outer=(1,1,3)) - @test tf(@net (x,y) -> fill(x,y))(3.2,[3,2]) ≈ convert(Array{Float32},3.2*ones(3,2)) - @test typeof(tf(@net x -> Flux.cast(x,Int32))(A)) == Array{Int32,3} - A = randn(Float32,(5,5)) - b = randn(Float32,(5,1)) - @test tf(@net (x,y) -> solve(x,y))(A,b) ≈ A\b - _,A,_ = lu(A) - @test tf(@net (x,y) -> triangular_solve(x,y))(A,b) ≈ A\b - @test size(tf(@net x -> randu(x))([2,3])) == (2,3) - @test size(tf(@net x -> randn(x))([2,3])) == (2,3) - m = tf(@net (x,y) -> Flux.expand_dims(x,y)) - A = randn(Float32,(3,2)) - @test m(A,1) ≈ Flux.expand_dims(A,1) - @test m(A,2) ≈ Flux.expand_dims(A,2) - @test m(A,3) ≈ Flux.expand_dims(A,3) -end - -end diff --git a/test/runtests.jl b/test/runtests.jl index 7128e13b..08e1ea9a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,14 +2,6 @@ using Flux, DataFlow, MacroTools, Base.Test using Flux: graph, Param, squeeze, unsqueeze, back!, update!, flatten using DataFlow: Line, Frame -macro mxonly(ex) - :(Base.find_in_path("MXNet") ≠ nothing && $(esc(ex))) -end - -macro tfonly(ex) - :(Base.find_in_path("TensorFlow") ≠ nothing && $(esc(ex))) -end - @testset "Flux" begin include("batching.jl") @@ -20,7 +12,4 @@ include("recurrent.jl") include("optimizer.jl") include("throttle.jl") -@tfonly include("backend/tensorflow.jl") -@mxonly include("backend/mxnet.jl") - end