diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 6b2ea0ab..00000000 --- a/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.jl.cov -*.jl.*.cov -*.jl.mem -docs/build/ -docs/site/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2a9a22ce..00000000 --- a/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -# Documentation: http://docs.travis-ci.com/user/languages/julia/ -language: julia -os: - - linux - - osx -julia: - - 0.5 -notifications: - email: false -# uncomment the following lines to override the default test script -script: - - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - - julia -e 'Pkg.clone("https://github.com/MikeInnes/DataFlow.jl")' - - julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)' -# after_success: - - julia -e 'Pkg.add("Documenter")' - - julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))' diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index 60fd1522..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,22 +0,0 @@ -The Flux.jl package is licensed under the MIT "Expat" License: - -> Copyright (c) 2016: Mike Innes. -> -> Permission is hereby granted, free of charge, to any person obtaining -> a copy of this software and associated documentation files (the -> "Software"), to deal in the Software without restriction, including -> without limitation the rights to use, copy, modify, merge, publish, -> distribute, sublicense, and/or sell copies of the Software, and to -> permit persons to whom the Software is furnished to do so, subject to -> the following conditions: -> -> The above copyright notice and this permission notice shall be -> included in all copies or substantial portions of the Software. -> -> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md deleted file mode 100644 index 9d4f0bcf..00000000 --- a/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# Флукс - -[![Build Status](https://travis-ci.org/MikeInnes/Flux.jl.svg?branch=master)](https://travis-ci.org/MikeInnes/Flux.jl) - -Flux is a high-level API for machine learning, implemented in Julia. - -Flux aims to provide a concise and expressive syntax for architectures that are hard to express within other frameworks. The notation should be familiar and extremely close to what you'd find in a paper or description of the model. - -The current focus is on ANNs with TensorFlow or MXNet as a backend. While it's in a very early working-prototype stage, you can see what works so far in the [examples folder](/examples). - -## Brief Examples - -Simple multi-layer-perceptron for MNIST: - -```julia -Chain( - Input(784), - Affine(128), relu, - Affine( 64), relu, - Affine( 10), softmax) -``` - -LSTM example: - -```julia -@net type LSTM - Wxf; Wyf; bf - Wxi; Wyi; bi - Wxo; Wyo; bo - Wxc; Wyc; bc - y; state - function (x) - # Gates - forget = σ( x * Wxf + y{-1} * Wyf + bf ) - input = σ( x * Wxi + y{-1} * Wyi + bi ) - output = σ( x * Wxo + y{-1} * Wyo + bo ) - # State update and output - state′ = tanh( x * Wxc + y{-1} * Wyc + bc ) - state = forget .* state{-1} + input .* state′ - y = output .* tanh(state) - end -end - -Chain( - Input(N), - LSTM(N, 256), - LSTM(256, 256), - Affine(256, N), - softmax) -``` diff --git a/REQUIRE b/REQUIRE deleted file mode 100644 index d0ddfa05..00000000 --- a/REQUIRE +++ /dev/null @@ -1,3 +0,0 @@ -julia 0.5- -TensorFlow -Iterators diff --git a/docs/make.jl b/docs/make.jl deleted file mode 100644 index 9ef67abd..00000000 --- a/docs/make.jl +++ /dev/null @@ -1,17 +0,0 @@ -using Documenter, Flux - -makedocs(modules=Module[Flux], - doctest=false, clean=true, - format = :html, - sitename="Flux Documentation", - pages = [ - "Home" => "index.md", - ]) - -deploydocs( - repo = "github.com/MikeInnes/Flux.jl.git", - target = "build", - osname = "linux", - julia = "0.5", - deps = nothing, - make = nothing) diff --git a/docs/src/index.md b/docs/src/index.md deleted file mode 100644 index 23a30b30..00000000 --- a/docs/src/index.md +++ /dev/null @@ -1 +0,0 @@ -# Flux diff --git a/examples/MNIST.jl b/examples/MNIST.jl deleted file mode 100644 index 70d26ee1..00000000 --- a/examples/MNIST.jl +++ /dev/null @@ -1,22 +0,0 @@ -using Flux, MNIST - -data = [(Vector{Float32}(trainfeatures(i)), onehot(Float32, trainlabel(i), 0:9)) for i = 1:60_000] -train = data[1:50_000] -test = data[50_001:60_000] - -m = Chain( - Input(784), - Affine(128), relu, - Affine( 64), relu, - Affine( 10), softmax) - -# Convert to TensorFlow -model = tf(m) - -# An example prediction pre-training -model(data[1][1]) - -@time Flux.train!(model, train, test, η = 1e-3) - -# An example prediction post-training -model(data[1][1]) diff --git a/examples/batching.jl b/examples/batching.jl deleted file mode 100644 index ccbac02f..00000000 --- a/examples/batching.jl +++ /dev/null @@ -1,26 +0,0 @@ -using Flux - -# Traditional Approach - -# 100 samples of sequences of 15 28×28 3-colour images -rand(100, 15, 28, 28, 3) - -# Basic Batching - -data = Batch([collect(reshape(9(i-1):9i-1, 3, 3)) for i = 1:10]) - -Batch(flatten.(data)) - -data |> structure - -Batch(flatten.(data)) |> structure - -# Nested Batching - -# DNA seqence, encoded as a list of [A, T, G, C] -x1 = Seq([[0,1,0,0], [1,0,0,0], [0,0,0,1]]) -x2 = Seq([[0,0,1,0], [0,0,0,1], [0,0,1,0]]) - -data = Batch([x1, x2]) - -data |> structure diff --git a/examples/char-rnn.jl b/examples/char-rnn.jl deleted file mode 100644 index 0bff81cf..00000000 --- a/examples/char-rnn.jl +++ /dev/null @@ -1,38 +0,0 @@ -using Flux -import StatsBase: wsample - -nunroll = 50 -nbatch = 50 - -getseqs(chars, alphabet) = sequences((onehot(Float32, char, alphabet) for char in chars), nunroll) -getbatches(chars, alphabet) = batches((getseqs(part, alphabet) for part in chunk(chars, nbatch))...) - -input = readstring("$(homedir())/Downloads/shakespeare_input.txt") -alphabet = unique(input) -N = length(alphabet) - -Xs, Ys = getbatches(input, alphabet), getbatches(input[2:end], alphabet) - -model = Chain( - Input(N), - LSTM(N, 256), - LSTM(256, 256), - Affine(256, N), - softmax) - -m = tf(unroll(model, nunroll)) - -@time Flux.train!(m, Xs, Ys, η = 0.1, epoch = 1) - -string(map(c -> onecold(c, alphabet), m(first(first(Xs))))...) - -function sample(model, n, temp = 1) - s = [rand(alphabet)] - m = tf(unroll(model, 1)) - for i = 1:n - push!(s, wsample(alphabet, softmax(m(Seq((onehot(Float32, s[end], alphabet),)))[1]./temp))) - end - return string(s...) -end - -sample(model, 100) diff --git a/examples/integration.jl b/examples/integration.jl deleted file mode 100644 index 704446e8..00000000 --- a/examples/integration.jl +++ /dev/null @@ -1,70 +0,0 @@ -using Flux, Juno - -# Flux aims to provide high-level APIs that work well across backends, but in -# some cases you may want to take advantage of features specific to a given -# backend (or Flux may simply not have an implementation of that feature yet). -# In these cases it's easy to "drop down" and use the backend's API directly, -# where appropriate. - -# In this example, both things are happening; firstly, Flux doesn't yet support -# ConvNets in the pure-Julia backend, but this is invisible thanks to the use of -# a simple "shim" type, `Conv2D`. This is provided by the library but could easily -# have been user-defined. - -# Secondly, we want to take advantage of TensorFlow.jl's training process and -# optimisers. We can simply call `Tensor` exactly as we would on a regular -# TensorFlow model, and the rest of the process trivially follows -# TensorFlow.jl's usual API. - -conv1 = Chain( - Reshape(28,28,1), - Conv2D((5,5), out = 20), tanh, - MaxPool((2,2), stride = (2,2))) - -conv2 = Chain( - Input(12,12,20), - Conv2D((5,5), in = 20, out = 50), tanh, - MaxPool((2,2), stride = (2,2))) - -lenet = Chain( - conv1, conv2, flatten, - Affine(500), tanh, - Affine(10), softmax) - -#-------------------------------------------------------------------------------- - -# Now we can continue exactly as in plain TensorFlow, following -# https://github.com/malmaud/TensorFlow.jl/blob/master/examples/mnist_full.jl -# (taking only the training and cost logic, not the graph building steps) - -using TensorFlow, Distributions - -include(Pkg.dir("TensorFlow", "examples", "mnist_loader.jl")) -loader = DataLoader() - -session = Session(Graph()) - -x = placeholder(Float32) -y′ = placeholder(Float32) -y = Tensor(lenet, x) - -cross_entropy = reduce_mean(-reduce_sum(y′.*log(y), reduction_indices=[2])) - -train_step = train.minimize(train.AdamOptimizer(1e-4), cross_entropy) - -accuracy = reduce_mean(cast(indmax(y, 2) .== indmax(y′, 2), Float32)) - -run(session, initialize_all_variables()) - -@progress for i in 1:1000 - batch = next_batch(loader, 50) - if i%100 == 1 - train_accuracy = run(session, accuracy, Dict(x=>batch[1], y′=>batch[2])) - info("step $i, training accuracy $train_accuracy") - end - run(session, train_step, Dict(x=>batch[1], y′=>batch[2])) -end - -testx, testy = load_test_set() -test_accuracy = run(session, accuracy, Dict(x=>testx, y′=>testy)) -info("test accuracy $test_accuracy") diff --git a/examples/translation.jl b/examples/translation.jl deleted file mode 100644 index df2559cb..00000000 --- a/examples/translation.jl +++ /dev/null @@ -1,52 +0,0 @@ -# Based on https://arxiv.org/abs/1409.0473 - -using Flux -using Flux: flip - -# A recurrent model which takes a token and returns a context-dependent -# annotation. - -@net type Encoder - forward - backward - token -> hcat(forward(token), backward(token)) -end - -Encoder(in::Integer, out::Integer) = - Encoder(LSTM(in, out÷2), flip(LSTM(in, out÷2))) - -# A recurrent model which takes a sequence of annotations, attends, and returns -# a predicted output token. - -@net type Decoder - attend - recur - state; y; N - function (anns) - energies = map(ann -> exp(attend(hcat(state{-1}, ann))[1]), seq(anns, N)) - weights = energies./sum(energies) - ctx = sum(map((α, ann) -> α .* ann, weights, anns)) - (_, state), y = recur((state{-1},y{-1}), ctx) - y - end -end - -Decoder(in::Integer, out::Integer; N = 1) = - Decoder(Affine(in+out, 1), - unroll1(LSTM(in, out)), - param(zeros(1, out)), param(zeros(1, out)), N) - -# The model - -Nalpha = 5 # The size of the input token vector -Nphrase = 7 # The length of (padded) phrases -Nhidden = 12 # The size of the hidden state - -encode = Encoder(Nalpha, Nhidden) -decode = Chain(Decoder(Nhidden, Nhidden, N = Nphrase), Affine(Nhidden, Nalpha), softmax) - -model = Chain( - unroll(encode, Nphrase, stateful = false), - unroll(decode, Nphrase, stateful = false, seq = false)) - -xs = Batch([Seq(rand(Float32, Nalpha) for _ = 1:Nphrase)]) diff --git a/src/Flux.jl b/src/Flux.jl deleted file mode 100644 index 1df87404..00000000 --- a/src/Flux.jl +++ /dev/null @@ -1,36 +0,0 @@ -module Flux - -using MacroTools, Lazy, DataFlow, Juno -using DataFlow: graphm, syntax, prewalk!, postwalk!, prewalk, postwalk, - iscyclic, Constant, constant, isconstant, group, Split, splitnode, - detuple, value, inputs, thread!, value, inputs, Split, splitnode, inputnode, - spliceinputs, bumpinputs, Frame -using Juno: Tree, Row - -# Zero Flux Given - -include("model.jl") -include("utils.jl") -include("data.jl") - -include("compiler/code.jl") -include("compiler/loops.jl") -include("compiler/interp.jl") -include("compiler/shape.jl") - -include("layers/affine.jl") -include("layers/activation.jl") -include("layers/recurrent.jl") -include("layers/shape.jl") -include("layers/chain.jl") -include("layers/shims.jl") - -include("dims/catmat.jl") -include("dims/batching.jl") -include("dims/seq.jl") - -include("cost.jl") - -include("backend/backend.jl") - -end # module diff --git a/src/backend/backend.jl b/src/backend/backend.jl deleted file mode 100644 index 372148f9..00000000 --- a/src/backend/backend.jl +++ /dev/null @@ -1,11 +0,0 @@ -export tf - -function loadtf() - isdefined(Flux, :TF) && return - @eval include(joinpath(dirname($@__FILE__), "tensorflow/tensorflow.jl")) -end - -function tf(args...) - loadtf() - TF.tf(args...) -end diff --git a/src/backend/tensorflow/graph.jl b/src/backend/tensorflow/graph.jl deleted file mode 100644 index bd618223..00000000 --- a/src/backend/tensorflow/graph.jl +++ /dev/null @@ -1,72 +0,0 @@ -using Base: @get! -using DataFlow: Constant, constant, Context, interpret, Split, - interpv, ituple, ilambda, iconst, iline, stack, mux -using Flux: imap -using TensorFlow: RawTensor - -# TODO: implement Julia's type promotion rules - -node(x::Tuple) = map(node, x) -node(x::Tensor) = x -node(x::Variable) = x -node(x::Number) = TensorFlow.constant(Float32(x)) - -graph(::typeof(tuple), args...) = (args...,) -graph(s::Split, t::Tuple) = t[s.n] -graph(::typeof(softmax), x) = nn.softmax(x) -graph(::typeof(relu), x) = nn.relu(x) -graph(::typeof(σ), x) = nn.sigmoid(x) -graph(::typeof(hcat), xs...) = concat(1, xs) -graph(::typeof(seq), xs, n) = TensorFlow.unpack(xs, num = n, axis = 1) - -for op in (tanh, *, .*, +, -) - @eval graph(::typeof($op), args...) = $op(node(args)...) -end - -graph(::typeof(.-), args...) = -(node(args)...) - -# reshape hack due to https://github.com/malmaud/TensorFlow.jl/issues/79 -batchsize(x::Tensor) = reduce_sum(slice(TensorFlow.shape(x), [0], [1])) -graph(::typeof(flatten), x) = reshape(x, pack([batchsize(x), Int32(-1)])) -graph(r::Reshape, x) = reshape(x, pack([batchsize(x), map(Int32, r.dims)...])) - -graph(::Input, x) = x - -graph(p::MaxPool, x) = - nn.max_pool(x, [1, p.size..., 1], [1, p.stride..., 1], "VALID") - -graph(op::Op, xs...) = op.f(xs...) - -function graph(ctx::Context, model, args...) - node = graph(model, interpv(ctx, args)...) - isa(node, Tensor) && (ctx[:stacks][node.op.name] = stack(ctx)) - return node -end - -interp(ctx, c::Conv2D, x) = - nn.conv2d(interpv(ctx, x), interp(ctx, Constant(c.filter)), [1,c.stride...,1], "VALID") - -interp{T<:AArray}(ctx, p::Constant{Flux.Param{T}}) = - haskey(ctx[:params], p.value) ? - ctx[:params][p.value] : - (ctx[:params][p.value] = Variable(p.value.x)) - -interp(ctx, p::Constant) = p.value - -function interp(ctx, model, args...) - g = Flux.graph(model) - g == nothing && return graph(ctx, model, args...) - DataFlow.iscyclic(g) && error("This model has a cycle; try unrolling it first.") - interpret(ctx, g, interpv(ctx, args)...) -end - -function tograph(model, args...) - ctx = Context(mux(iline, ilambda, ituple, imap, interp), - params = ObjectIdDict(), stacks = Dict()) - out = interp(ctx, model, map(constant, args)...) - return ctx[:params], ctx[:stacks], out -end - -TensorFlow.Tensor(m::Flux.Model, args...) = tograph(m, args...)[2] - -RawTensor(data::Union{Batch,Seq}) = RawTensor(rawbatch(data)) diff --git a/src/backend/tensorflow/model.jl b/src/backend/tensorflow/model.jl deleted file mode 100644 index ef6d2040..00000000 --- a/src/backend/tensorflow/model.jl +++ /dev/null @@ -1,98 +0,0 @@ -type Model - model::Any - session::Session - params::Dict{Flux.Param,Tensor} - stacks::Dict - inputs::Vector{Tensor} - output::Any -end - -function makesession(model, inputs; session = Session(Graph())) - params, stacks, output = tograph(model, inputs...) - run(session, initialize_all_variables()) - Model(model, session, params, stacks, inputs, output) -end - -function makesession(model, n::Integer; session = Session(Graph())) - makesession(model, [placeholder(Float32) for _ = 1:n], session = session) -end - -tf(model) = makesession(model, 1) - -function storeparams!(sess, params) - for (p, t) in params - p.x = run(sess, t) - end -end - -storeparams!(m::Model) = storeparams!(m.session, m.params) - -ismultioutput(m::Model) = !isa(m.output, Tensor) - -function batch(xs) - dims = ndims(xs)-1 - T = Array{eltype(xs),dims} - B = Array{eltype(xs),dims+1} - Batch{T,B}(xs) -end - -function tferr(model::Model, e) - m = match(r"Node: ([\w\d]+) =", string(e.status)) - m == nothing && return - node = m.captures[1] - if haskey(model.stacks, node) - stk = model.stacks[node] - println("TensorFlow error occured at:") - foreach(l -> println("$(l.file):$(l.line)"), stk) - end -end - -function runmodel(m::Model, args...) - @assert length(args) == length(m.inputs) - try - output = run(m.session, m.output, Dict(zip(m.inputs, args))) - ismultioutput(m) ? (batch.(output)...,) : batch(output) - catch e - isa(e, TensorFlow.TFException) || rethrow(e) - tferr(m, e) - rethrow(e) - end -end - -function (m::Model)(args::Batch...) - runmodel(m, args...) -end - -function (m::Model)(args...) - output = m(map(batchone, args)...) - ismultioutput(m) ? map(first, output) : first(output) -end - -for f in :[back!, update!].args - @eval function Flux.$f(m::Model, args...) - error($(string(f)) * " is not yet supported on TensorFlow models") - end -end - -import Juno: info - -function Flux.train!(m::Model, train, test=[]; epoch = 1, η = 0.1, - loss = (y, y′) -> reduce_sum((y - y′).^2)/2, - opt = TensorFlow.train.GradientDescentOptimizer(η)) - i = 0 - Y = placeholder(Float32) - Loss = loss(m.output, Y) - minimize_op = TensorFlow.train.minimize(opt, Loss) - for e in 1:epoch - info("Epoch $e\n") - @progress for (x, y) in train - y, cur_loss, _ = run(m.session, vcat(m.output, Loss, minimize_op), - Dict(m.inputs[1]=>batchone(x), Y=>batchone(y))) - if i % 5000 == 0 - @show y - @show accuracy(m, test) - end - i += 1 - end - end -end diff --git a/src/backend/tensorflow/recurrent.jl b/src/backend/tensorflow/recurrent.jl deleted file mode 100644 index 5abee520..00000000 --- a/src/backend/tensorflow/recurrent.jl +++ /dev/null @@ -1,83 +0,0 @@ -# TODO: refactor, some of this is more general than just the TF backend - -type SeqModel - m::Model - state::Any -end - -cgroup(xs...) = DataFlow.group(map(constant, xs)...) - -function makesession(model::Flux.Unrolled) - sess = Session(Graph()) - input = placeholder(Float32) - inputs = TensorFlow.unpack(input, num = model.steps, axis = 1) - let params, stacks, outputs, instates, outstates - if model.stateful - instates = [placeholder(Float32) for _ in model.state] - params, stacks, (outstates, outputs) = tograph(model, cgroup(instates...), cgroup(inputs...)) - else - params, stacks, outputs = tograph(model, cgroup(inputs...)) - end - output = TensorFlow.pack(outputs, axis = 1) - run(sess, initialize_all_variables()) - sess, params, stacks, (instates, input), (outstates, output) - end -end - -function tf(model::Flux.Unrolled) - sess, params, stacks, (instates, input), (outstates, output) = makesession(model) - SeqModel( - Model(model, sess, params, stacks, - [instates..., input], [outstates..., output]), - model.state) -end - -function batchseq(xs) - dims = ndims(xs)-2 - T = Array{eltype(xs),dims} - S = Array{eltype(xs),dims+1} - B = Array{eltype(xs),dims+2} - Batch{Seq{T,S},B}(xs) -end - -batchseq(xs::Batch) = batchseq(rawbatch(xs)) - -TensorFlow.get_tensors(x::Tuple) = TensorFlow.get_tensors(collect(x)) - -function (m::SeqModel)(x::BatchSeq) - m.m.model.stateful || return batchseq(runmodel(m.m, x)[end]) - if isempty(m.state) || length(first(m.state)) ≠ length(x) - m.state = batchone.(m.m.model.state) - end - output = runmodel(m.m, m.state..., x) - m.state, output = output[1:end-1], output[end] - return batchseq(output) -end - -(m::SeqModel)(x::Seq) = first(m(batchone(x))) - -function Flux.train!(m::SeqModel, Xs, Ys; epoch = 1, η = 0.1, - loss = (y, ŷ) -> -reduce_sum(y .* log(ŷ)), - opt = () -> TensorFlow.train.GradientDescentOptimizer(η)) - batchlen, seqlen = length(first(Xs)), length(first(Xs)[1]) - state = batchone.(m.m.model.state) - sess, params, stacks, (instates, input), (outstates, output) = makesession(m.m.model) - Y = placeholder(Float32) - Loss = loss(Y, output)/batchlen/seqlen - minimize_op = TensorFlow.train.minimize(opt(), Loss) - @progress "training" for e in 1:epoch - info("Epoch $e\n") - @progress "epoch" for (i, (x, y)) in enumerate(zip(Xs,Ys)) - out = run(sess, vcat(outstates..., output, Loss, minimize_op), - merge(Dict(input=>batchone(x), Y=>batchone(y)), - Dict(zip(instates, state)))) - state = out[1:length(state)] - loss = out[end-1] - isnan(loss) && error("Loss is NaN") - isinf(loss) && error("Loss is Inf") - (i-1) % 10 == 0 && @show loss - end - end - storeparams!(sess, params) - return -end diff --git a/src/backend/tensorflow/tensorflow.jl b/src/backend/tensorflow/tensorflow.jl deleted file mode 100644 index f2c27f2b..00000000 --- a/src/backend/tensorflow/tensorflow.jl +++ /dev/null @@ -1,21 +0,0 @@ -module TF - -using ..Flux, DataFlow, TensorFlow, Juno -import Flux: accuracy - -export tf - -type Op - f - shape -end - -Op(f) = Op(f, (d...) -> nothing) - -Flux.shape(op::Op, d...) = op.shape(d...) - -include("graph.jl") -include("model.jl") -include("recurrent.jl") - -end diff --git a/src/compiler/code.jl b/src/compiler/code.jl deleted file mode 100644 index 49c7dc65..00000000 --- a/src/compiler/code.jl +++ /dev/null @@ -1,81 +0,0 @@ -import DataFlow: mapconst, cse - -export @net, @ml - -function process_func(ex, params = []) - @capture(shortdef(ex), (args__,) -> body_) - body = @> body MacroTools.flatten liftloops graphm DataFlow.il - body = mapconst(x -> x in params ? :(self.$x) : x, body) - return args, body -end - -function makegraph(graph, args) - @assert length(args) == 1 - graph = prewalk(graph) do v - isa(value(v), Constant) && value(v).value == args[1] ? - inputnode(1) : - v - end - graph = map(graph) do x - isa(x, Offset) ? - :(Flux.Offset($(Expr(:quote, x.name)), $(x.n), self.$(x.name))) : - x - end - vertex(:(Flux.Frame(self)), graph) -end - -function build_type(T, params) - @esc T - ex = quote - type $T <: Model - $(params...) - end - end - if any(x->isexpr(x, Symbol), params) - push!(ex.args, - :($T($(map(x->isexpr(x, Symbol) ? :($x::AArray) : x, params)...)) = - $T($(map(x->isexpr(x, Symbol) ? :(param($x)) : namify(x), params)...)))) - end - ex -end - -import Lazy: groupby - -reifyparams(v::IVertex) = mapconst(x -> isa(x, Param) ? x.x : x, v) - -function process_type(ex) - @capture(ex, type T_ fs__ end) - @destruct [params = false || [], - funcs = true || []] = groupby(x->isexpr(x, :->, :function), fs) - @assert length(funcs) == 1 - pnames = namify.(params) - args, body = process_func(funcs[1], pnames) - @assert length(args) == 1 - self = esc(:self) - quote - $(build_type(T, params)) - $(esc(:(self::$T)))($(args...),) = interpret(reifyparams(graph($self)), $(args...)) - $(esc(:(Flux.update!(self::$T, η)))) = ($(map(p -> :(update!($self.$p, η)), pnames)...);) - $(esc(:(Flux.graph(self::$T)))) = $(DataFlow.constructor(mapconst(esc, makegraph(body, args)))) - nothing - end -end - -macro net(ex) - isexpr(ex, :type) ? process_type(ex) : - isexpr(ex, :->, :function) ? error("@net functions not implemented") : - error("Unsupported model expression $ex") -end - -function process_anon(ex) - args, body = process_func(ex) - @assert length(args) == 1 - :(Flux.Capacitor($(DataFlow.constructor(mapconst(esc, makegraph(body, args)))))) -end - -macro ml(ex) - @capture(shortdef(ex), ((xs__,) -> body_ ) | (f_(xs__,) = body_)) || - error("@ml requires a function definition") - ex = process_anon(:($(xs...,) -> $body)) - f == nothing ? ex : :($(esc(f)) = $ex) -end diff --git a/src/compiler/interp.jl b/src/compiler/interp.jl deleted file mode 100644 index 05572150..00000000 --- a/src/compiler/interp.jl +++ /dev/null @@ -1,26 +0,0 @@ -using DataFlow: mux, interpret, interpv, ituple, ilambda, iconst, Context - -function astuple(xs::Vertex) - isconstant(xs) && isa(value(xs).value, Tuple) ? value(xs).value : - isa(xs, Vertex) && value(xs) == tuple ? inputs(xs) : - nothing -end - -astuple(xs::Tuple) = xs - -astuple(xs) = nothing - -function astuples(xs) - xs = [astuple(x) for x in xs] - all(x->!(x==nothing), xs) ? xs : nothing -end - -function imap(cb, ctx, ::typeof(map), f, xs...) - f, xs = interpv(ctx, (f, xs)) - xs′ = astuples(xs) - xs′ ≠ nothing ? - group(map(f, xs′...)...) : - cb(ctx, map, constant(f), xs...) -end - -imap(f, args...) = f(args...) diff --git a/src/compiler/loops.jl b/src/compiler/loops.jl deleted file mode 100644 index e5a49f53..00000000 --- a/src/compiler/loops.jl +++ /dev/null @@ -1,134 +0,0 @@ -export unroll, unroll1 - -type Offset - name::Symbol - n::Int - default::Nullable{Param} -end - -Offset(name, n) = Offset(name, n, nothing) - -Base.:-(o::Offset) = Offset(o.name, -o.n, o.default) - -function liftloops(ex) - ex = DataFlow.normedges(ex) - decls = Dict() - ex = MacroTools.postwalk(ex) do ex - @capture(ex, x_{n_}) || return ex - haskey(decls, (x,n)) && return namify(decls[(x,n)]) - @gensym edge - decls[(x,n)] = :($edge = $(Offset(x,n))($x)) - edge - end - prepend!(ex.args, collect(values(decls))) - ex -end - -function hasloops(model) - g = graph(model) - g == nothing && return false - iscyclic(g) && return true - result = false - map(m -> hasloops(m) && (result = true), g) - return result -end - -function atomise(model) - postwalk(graph(model)) do v - hasloops(value(v)) || return v - spliceinputs(atomise(value(v)), inputs(v)...) - end -end - -function collect_state(v::IVertex) - state = typeof(v)[] - offset = Int[] - default = Param[] - prewalk!(v) do v - isa(value(v), Offset) || return v - if (i = findfirst(state, v[1])) == 0 - push!(state, v[1]) - push!(offset, max(0, -value(v).n)) - push!(default, get(value(v).default)) - else - offset[i] = max(offset[i], -value(v).n) - end - v - end - return state, offset, default -end - -hiddeninput(n) = vertex(Split(n), inputnode(1)) - -function create_steps(v::IVertex, n; seq = true, stateful = true) - [(stateful ? bumpinputs : copy)(seq ? spliceinputs(v, hiddeninput(i)) : v) for i = 1:n] -end - -function getvar(n, step, steps, offset, default; stateful = true) - if stateful && step < 1 - hiddeninput(sum(offset[1:n-1]) + 1 - step) - elseif step ∉ 1:length(steps) - constant(default[n]) - else - steps[step][1,n] - end -end - -function stateout(steps, offset, default) - outs = [] - defaults = [] - for i = 1:length(offset), j = 1:offset[i] - push!(outs, getvar(i, length(steps)-j+1, steps, offset, default)) - push!(defaults, default[i]) - end - group(outs...), defaults -end - -function unrollgraph(v::IVertex, n; seq = true, stateful = true) - state, offset, default = collect_state(v) - v = group(group(state...), v) - steps = create_steps(v, n, seq = seq, stateful = stateful) - for i = 1:n - vars = inputs(steps[i][1]) - postwalk!(steps[i]) do v - isa(value(v), Offset) || return v - varid = findfirst(vars,v[1]) - getvar(varid, value(v).n + i, steps, offset, default, stateful = stateful) - end - end - out = group(map(x->x[2], steps)...) - if stateful - state, defaults = stateout(steps, offset, default) - group(state,out), map(Flux.state, defaults) - else - out, [] - end -end - -unrollgraph(m, n; kws...) = unrollgraph(atomise(m), n; kws...) - -# TODO: perhaps split into SeqModel + StatefulModel -type Unrolled <: Model - model - graph::IVertex{Any} - state::Vector{Any} - stateful::Bool - steps::Int -end - -(m::Unrolled)(xs...) = interpret(reifyparams(m.graph), xs...) - -graph(u::Unrolled) = u.graph - -function unroll(model, n; seq = true, stateful = true) - graph, state = unrollgraph(model, n; seq = seq, stateful = stateful) - seq || stateful ? Unrolled(model, graph, state, stateful, n) : Capacitor(graph) -end - -function unroll1(model) - graph, state = unrollgraph(model, 1; seq = false) - graph = group(graph[1], map(x->x[1], inputs(graph)[2:end])...) - Unrolled(model, graph, state, false, 1) -end - -flip(model) = Capacitor(map(x -> isa(x, Offset) ? -x : x, atomise(model))) diff --git a/src/compiler/shape.jl b/src/compiler/shape.jl deleted file mode 100644 index 99d496c9..00000000 --- a/src/compiler/shape.jl +++ /dev/null @@ -1,48 +0,0 @@ -using DataFlow: ilinev, iargs, applylines, Line - -type Hint - typ -end - -DataFlow.tocall(h::Hint, x) = :($x::$(h.typ)) - -function gethint(v::IVertex) - while isa(value(v), Union{Line,Frame}) v = v[1] end - isa(value(v), Hint) && return value(v).typ - return -end - -ihint(f, ctx::Context, h::Hint, x) = vertex(h, x) -ihint(f, args...) = f(args...) - -hintify(c::Constant) = hintify(state(c.value)) -hintify(xs::AbstractArray) = vertex(Hint(size(xs)), constant(:_)) - -interpshape = mux(ilinev, ihint, iargs, ituple, hintify) - -function hintify(f, xs...) - sh = infer(f, map(gethint, xs)...) - sh ≠ nothing ? vertex(Hint(sh), vertex(f, xs...)) : - !any(x->x==nothing, xs) && graph(f) ≠ nothing ? interpret(Context(interpshape), graph(f), xs...) : - vertex(f, xs...) -end - -function shapesv(f, args...) - (g = graph(f)) == nothing && return - ins = [vertex(Hint(d), inputnode(i)) for (i,d) in enumerate(args)] - interpret(Context(interpshape), g, ins...) -end - -shapes(args...) = shapesv(args...) |> syntax |> applylines |> (x->prettify(x, lines=true)) - -# Inference primitives - -infer(f, args...) = graph(f) == nothing ? nothing : gethint(shapesv(f, args...)) - -function infer(::typeof(*), a::NTuple{2}, b::NTuple{2}) - a[2] == b[1] || return nothing - (a[1], b[2]) -end - -# TODO: make correct -infer(::typeof(+), a, b) = a diff --git a/src/cost.jl b/src/cost.jl deleted file mode 100644 index 34267202..00000000 --- a/src/cost.jl +++ /dev/null @@ -1,8 +0,0 @@ -export mse, mse! - -function mse!(Δ, pred, target) - map!(-, Δ, pred, target) - sumabs2(Δ)/2 -end - -mse(pred, target) = mse(similar(pred), pred, target) diff --git a/src/data.jl b/src/data.jl deleted file mode 100644 index be5497b6..00000000 --- a/src/data.jl +++ /dev/null @@ -1,36 +0,0 @@ -export onehot, onecold, chunk, partition, batches, sequences - -""" - onehot('b', ['a', 'b', 'c', 'd']) => [false, true, false, false] - - onehot(Float32, 'c', ['a', 'b', 'c', 'd']) => [0., 0., 1., 0.] - -Produce a one-hot-encoded version of an item, given a list of possible values -for the item. -""" -onehot(T::Type, label, labels) = T[i == label for i in labels] -onehot(label, labels) = onehot(Int, label, labels) - -""" - onecold([0.0, 1.0, 0.0, ...], - ['a', 'b', 'c', ...]) => 'b' - -The inverse of `onehot`; takes an output prediction vector and a list of -possible values, and produces the appropriate value. -""" -onecold(pred, labels = 1:length(pred)) = labels[findfirst(pred, maximum(pred))] - -using Iterators -import Iterators: partition - -export partition - -Base.length(l::Iterators.Partition) = length(l.xs) ÷ l.step - -_partition(r::UnitRange, step::Integer) = (step*(i-1)+1:step*i for i in 1:(r.stop÷step)) -_partition(xs, step) = (xs[i] for i in _partition(1:length(xs), step)) - -chunk(xs, n) = _partition(xs, length(xs)÷n) - -batches(xs...) = (Batch(x) for x in zip(xs...)) -sequences(xs, len) = (Seq(x) for x in partition(xs, len)) diff --git a/src/dims/batching.jl b/src/dims/batching.jl deleted file mode 100644 index 8faf50f6..00000000 --- a/src/dims/batching.jl +++ /dev/null @@ -1,21 +0,0 @@ -export Batch, batchone - -immutable Batch{T,S} <: AbstractVector{T} - data::CatMat{T,S} -end - -@forward Batch.data size, eltype, getindex, setindex!, rawbatch - -Batch(xs) = Batch(CatMat(xs)) - -convert{T,S}(::Type{Batch{T,S}},storage::S) = - Batch{T,S}(storage) - -batchone(x) = Batch((x,)) -batchone(x::Batch) = x - -@render Juno.Inline b::Batch begin - Tree(Row(Text("Batch of "), eltype(b), - Juno.fade("[$(length(b))]")), - Juno.trim(collect(b))) -end diff --git a/src/dims/catmat.jl b/src/dims/catmat.jl deleted file mode 100644 index 17592921..00000000 --- a/src/dims/catmat.jl +++ /dev/null @@ -1,50 +0,0 @@ -import Base: eltype, size, getindex, setindex!, convert - -export CatMat, rawbatch - -immutable CatMat{T,S} <: AbstractVector{T} - data::S -end - -convert{T,S}(::Type{CatMat{T,S}},storage::S) = - CatMat{T,S}(storage) - -eltype{T}(::CatMat{T}) = T - -size(b::CatMat) = (size(b.data, 1),) - -getindex(b::CatMat, i)::eltype(b) = slicedim(b.data, 1, i) - -setindex!(b::CatMat, v, i) = b[i, :] = v - -allequal(xs) = all(x -> x == first(xs), xs) - -function (::Type{CatMat{T,S}}){T,S}(xs, storage::S) - @assert @>> xs map(size) allequal - @assert size(storage) == (length(xs), size(first(xs))...) - for i = 1:length(xs) - storage[i, :] = xs[i] - end - return CatMat{T,S}(storage) -end - -function (::Type{CatMat{T}}){T}(xs) - xs′ = map(rawbatch, xs) - storage = similar(first(xs′), (length(xs′), size(first(xs′))...)) - CatMat{T,typeof(storage)}(xs′, storage) -end - -function CatMat(xs) - xs = promote(xs...) - CatMat{eltype(xs)}(xs) -end - -@render Juno.Inline b::CatMat begin - Tree(Row(Text("CatMat of "), eltype(b), - Juno.fade("[$(length(b))]")), - Juno.trim(collect(b))) -end - -rawbatch(xs) = xs - -rawbatch(xs::CatMat) = xs.data diff --git a/src/dims/seq.jl b/src/dims/seq.jl deleted file mode 100644 index 3b0d3d02..00000000 --- a/src/dims/seq.jl +++ /dev/null @@ -1,20 +0,0 @@ -export seq, Seq, BatchSeq - -immutable Seq{T,S} <: AbstractVector{T} - data::CatMat{T,S} -end - -@forward Seq.data size, eltype, getindex, setindex!, rawbatch - -Seq(xs) = Seq(CatMat(xs)) - -convert{T,S}(::Type{Seq{T,S}},storage::S) = - Seq{T,S}(storage) - -@render Juno.Inline b::Seq begin - Tree(Row(Text("Seq of "), eltype(b), - Juno.fade("[$(length(b))]")), - Juno.trim(collect(b))) -end - -typealias BatchSeq{T<:Seq} Batch{T} diff --git a/src/layers/activation.jl b/src/layers/activation.jl deleted file mode 100644 index 88cad04c..00000000 --- a/src/layers/activation.jl +++ /dev/null @@ -1,18 +0,0 @@ -export σ, relu, softmax, flatten - -σ(x) = 1 ./ (1 + exp.(-x)) - -back!(::typeof(σ), Δ, x) = Δ .* σ(x)./(1.-σ(x)) - -relu(x) = max(0, x) - -back!(::typeof(relu), Δ, x) = Δ .* (x .< 0) - -softmax(xs) = exp.(xs) ./ sum(exp.(xs)) - -flatten(xs) = reshape(xs, length(xs)) - -shape(::typeof(flatten), in) = prod(in) - -infer(::typeof(softmax), x) = x -infer(::typeof(σ), x) = x diff --git a/src/layers/affine.jl b/src/layers/affine.jl deleted file mode 100644 index 81fef977..00000000 --- a/src/layers/affine.jl +++ /dev/null @@ -1,20 +0,0 @@ -export Affine - -# TODO: type hints for parameters - -@net type Affine - W - b - x -> x*W + b -end - -Affine(in::Integer, out::Integer; init = initn) = - Affine(init(in, out), init(1, out)) - -@net type Sigmoid - layer::Model - x -> σ(layer(x)) -end - -Sigmoid(in::Integer, out::Integer; init = randn) = - Sigmoid(Affine(in, out, init = init)) diff --git a/src/layers/chain.jl b/src/layers/chain.jl deleted file mode 100644 index b4bd6ced..00000000 --- a/src/layers/chain.jl +++ /dev/null @@ -1,32 +0,0 @@ -export Chain - -function inferchain(ms) - chain = [] - sh = nothing - for m in ms - m = init(m, single(sh)) - sh = shape(m, sh) - push!(chain, m) - end - return chain, sh -end - -type Chain <: Model - layers::Vector{Any} - shape - function Chain(ms...) - ms, shape = inferchain(ms) - return new(ms, shape) - end -end - -@forward Chain.layers Base.getindex, Base.first, Base.last - -(s::Chain)(x) = foldl((x, m) -> m(x), x, s.layers) -back!(s::Chain, Δ) = foldr((m, Δ) -> back!(m, Δ), Δ, s.layers) -update!(s::Chain, η) = foreach(l -> update!(l, η), s.layers) - -graph(s::Chain) = - foldl((v, m) -> vertex(m, v), constant(inputnode(1)), s.layers) - -shape(c::Chain, in) = c.shape diff --git a/src/layers/recurrent.jl b/src/layers/recurrent.jl deleted file mode 100644 index e012a215..00000000 --- a/src/layers/recurrent.jl +++ /dev/null @@ -1,51 +0,0 @@ -export Recurrent, GatedRecurrent, LSTM - -@net type Recurrent - Wxy; Wyy; by - y - function (x) - y = tanh( x * Wxy + y{-1} * Wyy + by ) - end -end - -Recurrent(in, out; init = initn) = - Recurrent(init((in, out)), init((out, out)), init(out), init(out)) - -@net type GatedRecurrent - Wxr; Wyr; br - Wxu; Wyu; bu - Wxh; Wyh; bh - y - function (x) - reset = σ( x * Wxr + y{-1} * Wyr + br ) - update = σ( x * Wxu + y{-1} * Wyu + bu ) - y′ = tanh( x * Wxh + (reset .* y{-1}) * Wyh + bh ) - y = (1 .- update) .* y′ + update .* y{-1} - end -end - -GatedRecurrent(in, out; init = initn) = - GatedRecurrent(vcat([[init((in, out)), init((out, out)), init(out)] for _ = 1:3]...)..., - zeros(Float32, out)) - -@net type LSTM - Wxf; Wyf; bf - Wxi; Wyi; bi - Wxo; Wyo; bo - Wxc; Wyc; bc - y; state - function (x) - # Gates - forget = σ( x * Wxf + y{-1} * Wyf + bf ) - input = σ( x * Wxi + y{-1} * Wyi + bi ) - output = σ( x * Wxo + y{-1} * Wyo + bo ) - # State update and output - state′ = tanh( x * Wxc + y{-1} * Wyc + bc ) - state = forget .* state{-1} + input .* state′ - y = output .* tanh(state) - end -end - -LSTM(in, out; init = initn) = - LSTM(vcat([[init((in, out)), init((out, out)), init((1, out))] for _ = 1:4]...)..., - zeros(Float32, out), zeros(Float32, out)) diff --git a/src/layers/shape.jl b/src/layers/shape.jl deleted file mode 100644 index 5fe47cd9..00000000 --- a/src/layers/shape.jl +++ /dev/null @@ -1,47 +0,0 @@ -export Input - -typealias Dims{N} NTuple{N,Int} - -dims(d::Dims) = d - -dims(i...) = (i...,) - -single(i) = i -single(i::Dims) = length(i) == 1 ? first(i) : i - -# Shim for kicking off shape inference - -type ShapeError <: Exception - layer - shape -end - -type Input{N} <: Model - dims::Dims{N} -end - -Input(i...) = Input(dims(i...)) - -(::Input)(x) = x -back!(::Input, Δ, x) = Δ - -# Initialise placeholder - -type Init{F} - f::F -end - -init(i::Init, input...) = i.f(input...) -init(m, input...) = m - -# Shape inference API - -shape(x, in) = in - -shape(i::Input, _) = i.dims - -# Implementation for bundled layers - -shape(d::Affine, _) = length(state(d.b)) # TODO: could perhaps infer this - -Affine(out::Integer) = Init(in::Integer -> Affine(in, out)) diff --git a/src/layers/shims.jl b/src/layers/shims.jl deleted file mode 100644 index 8ffc4e1b..00000000 --- a/src/layers/shims.jl +++ /dev/null @@ -1,44 +0,0 @@ -export Conv2D, MaxPool, Reshape - -type Conv2D <: Model - filter::Param{Array{Float32,4}} # [height, width, inchans, outchans] - stride::Dims{2} -end - -Conv2D(size; in = 1, out = 1, stride = (1,1), init = initn) = - Conv2D(param(initn(size..., in, out)), stride) - -shape(c::Conv2D, in::Dims{2}) = - (map(i -> (in[i]-size(c.filter,i))÷c.stride[i]+1, (1,2))..., size(c.filter, 4)) - -shape(c::Conv2D, in::Dims{3}) = - shape(c, (in[1],in[2])) - -type MaxPool <: Model - size::Dims{2} - stride::Dims{2} -end - -MaxPool(size; stride = (1,1)) = - MaxPool(size, stride) - -shape(c::MaxPool, in::Dims{2}) = - map(i -> (in[i]-c.size[i])÷c.stride[i]+1, (1,2)) - -shape(c::MaxPool, in::Dims{3}) = - (shape(c, (in[1],in[2]))..., in[3]) - -shape(c::MaxPool, in) = throw(ShapeError(c, in)) - -immutable Reshape{N} - dims::Dims{N} -end - -Reshape(dims::Integer...) = Reshape(dims) - -function shape(r::Reshape, dims) - prod(dims) == prod(r.dims) || throw(ShapeError(r, dims)) - return r.dims -end - -shape(r::Reshape, ::Void) = r.dims diff --git a/src/model.jl b/src/model.jl deleted file mode 100644 index 9bca1e8c..00000000 --- a/src/model.jl +++ /dev/null @@ -1,111 +0,0 @@ -export Model, back!, update!, param - -# Basic model API - -""" - (m::Model)(X...) => Y - -A "model" is a function with state. For example, a logistic regression is the -function - - x -> σ(x * W + b) - -where `W` and `b` are a trainable matrix and vector of weights repectively. The -`Model` abstract type is used loosely; in general the concept of a model is -closer to a protocol, and models don't need to inherit from this type. Normal -Julia functions are models with 0 parameters, for example. -""" -abstract Model - -""" - back!(m::Model, ΔY, X...) => ΔX - -Backpropagate the gradient `ΔY` through the model `m`, accumulating the -gradients of any parameters. Returns the gradient of the input `X`. Gradients -may be arrays or tuples of arrays (for multiple inputs/outputs). -""" -back!(m::Model, Δ, xs...) = error("Backprop not implemented for $(typeof(m))") - -""" - update!(m::Model, η) => m - -Update the parameters of the model `m` using the accumulated gradients from -`back!`, using the learning rate `η`. -""" -update!(m, η) = m - -""" - graph(m::Model) => ::IVertex{Any} | nothing - -Returns the graph representation of the model, if any. Most models are built -from lower-level components and can simply implement this method to get most of -Flux's functionality. If this method isn't available, functionality like -backpropagation or conversion for backend must be implemented on a case-by-case -basis. Alternatively, one can implement this method and override individual -methods as necessary. -""" -graph(m) = nothing - -# Model parameters - -""" -A `Param` object stores a parameter array along with an accumulated delta to -that array. When converting to backends like TensorFlow, identical `Param`s will -result in identical variable objects, making model reuse trivial. -""" -type Param{T} - x::T - Δx::T -end - -""" - param(x::T) => ::Param{T} - -Convenience method for creating a `Param` object for a given array. -""" -param(x) = Param(x, zero(x)) - -state(p::Param) = p.x - -""" - accumulate!(p::Param, Δ) => p - -Accumulates the update `Δ` on `p`. The value of `p` won't change until -`update!`. -""" -function accumulate!(p::Param, Δ) - p.Δx += Δ - return p -end - -""" - update!(p::Param) - -Apply the accumulated updates to the value of the parameter. -""" -function update!(p::Param, η) - p.x .-= p.Δx .* η - p.Δx[:] = 0 - return p -end - -state(x) = x -accumulate!(x, Δ) = x - -@forward Param.x Base.size - -function Base.show(io::IO, p::Param) - print(io, "Param", size(p.x)) -end - -# Anonymous models - -export Capacitor - -type Capacitor <: Model - graph::IVertex{Any} -end - -(m::Capacitor)(xs...) = interpret(reifyparams(m.graph), xs...) - -graph(cap::Capacitor) = cap.graph diff --git a/src/utils.jl b/src/utils.jl deleted file mode 100644 index c685fd4e..00000000 --- a/src/utils.jl +++ /dev/null @@ -1,30 +0,0 @@ -export AArray - -const AArray = AbstractArray - -initn(dims...) = randn(Float32, dims...)/10 - -function train!(m, train, test = []; epoch = 1, batch = 10, η = 0.1) - i = 0 - Δ = zeros(length(train[1][2])) - for _ in 1:epoch - @progress for (x, y) in train - i += 1 - pred = m(x) - any(isnan, pred) && error("NaN") - err = mse!(Δ, pred, y) - back!(m, Δ, x) - i % batch == 0 && update!(m, η) - i % 1000 == 0 && @show accuracy(m, test) - end - end - return m -end - -function accuracy(m, data) - correct = 0 - for (x, y) in data - onecold(m(x)) == onecold(y) && (correct += 1) - end - return correct/length(data) -end diff --git a/test/backend.jl b/test/backend.jl deleted file mode 100644 index e69de29b..00000000 diff --git a/test/basic.jl b/test/basic.jl deleted file mode 100644 index 7209b36f..00000000 --- a/test/basic.jl +++ /dev/null @@ -1,10 +0,0 @@ -xs = randn(10)' # TODO: batching semantics - -d = Affine(10, 20) - -@test d(xs) == xs*d.W.x + d.b.x - -let - @capture(syntax(d), _Frame(_Line(x_[1] * W_ + b_))) - @test isa(x, Input) && isa(W, Param) && isa(b, Param) -end diff --git a/test/recurrent.jl b/test/recurrent.jl deleted file mode 100644 index e69de29b..00000000 diff --git a/test/runtests.jl b/test/runtests.jl deleted file mode 100644 index 810223ec..00000000 --- a/test/runtests.jl +++ /dev/null @@ -1,10 +0,0 @@ -using Flux, DataFlow, MacroTools, Base.Test -using Flux: graph, Param -using DataFlow: Input, Line, Frame - -syntax(v::Vertex) = prettify(DataFlow.syntax(v)) -syntax(x) = syntax(graph(x)) - -include("basic.jl") -include("recurrent.jl") -include("backend.jl")