remove all

2017-01-16 02:00:24 +01:00 · 2017-01-16 02:00:24 +01:00 · dcbc77980e
commit dcbc77980e
parent 1010b18084
39 changed files with 0 additions and 1441 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +0,0 @@
 *.jl.cov
 *.jl.*.cov
 *.jl.mem
 docs/build/
 docs/site/
--- a/.travis.yml
+++ b/.travis.yml
@ -1,17 +0,0 @@
 # Documentation: http://docs.travis-ci.com/user/languages/julia/
 language: julia
 os:
  - linux
  - osx
 julia:
  - 0.5
 notifications:
  email: false
 # uncomment the following lines to override the default test script
 script:
  - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
  - julia -e 'Pkg.clone("https://github.com/MikeInnes/DataFlow.jl")'
  - julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)'
 # after_success:
  - julia -e 'Pkg.add("Documenter")'
  - julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))'
--- a/LICENSE.md
+++ b/LICENSE.md
@ -1,22 +0,0 @@
 The Flux.jl package is licensed under the MIT "Expat" License:
 > Copyright (c) 2016: Mike Innes.
 >
 > Permission is hereby granted, free of charge, to any person obtaining
 > a copy of this software and associated documentation files (the
 > "Software"), to deal in the Software without restriction, including
 > without limitation the rights to use, copy, modify, merge, publish,
 > distribute, sublicense, and/or sell copies of the Software, and to
 > permit persons to whom the Software is furnished to do so, subject to
 > the following conditions:
 >
 > The above copyright notice and this permission notice shall be
 > included in all copies or substantial portions of the Software.
 >
 > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 > EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 > MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 > IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 > CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 > TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 > SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/README.md
+++ b/README.md
@ -1,50 +0,0 @@
 # Флукс
 [![Build Status](https://travis-ci.org/MikeInnes/Flux.jl.svg?branch=master)](https://travis-ci.org/MikeInnes/Flux.jl)
 Flux is a high-level API for machine learning, implemented in Julia.
 Flux aims to provide a concise and expressive syntax for architectures that are hard to express within other frameworks. The notation should be familiar and extremely close to what you'd find in a paper or description of the model.
 The current focus is on ANNs with TensorFlow or MXNet as a backend. While it's in a very early working-prototype stage, you can see what works so far in the [examples folder](/examples).
 ## Brief Examples
 Simple multi-layer-perceptron for MNIST:
 ```julia
 Chain(
  Input(784),
  Affine(128), relu,
  Affine( 64), relu,
  Affine( 10), softmax)
 ```
 LSTM example:
 ```julia
@net type LSTM
  Wxf; Wyf; bf
  Wxi; Wyi; bi
  Wxo; Wyo; bo
  Wxc; Wyc; bc
  y; state
  function (x)
    # Gates
    forget = σ( x * Wxf + y{-1} * Wyf + bf )
    input  = σ( x * Wxi + y{-1} * Wyi + bi )
    output = σ( x * Wxo + y{-1} * Wyo + bo )
    # State update and output
    state′ = tanh( x * Wxc + y{-1} * Wyc + bc )
    state  = forget .* state{-1} + input .* state′
    y = output .* tanh(state)
  end
 end
 Chain(
  Input(N),
  LSTM(N, 256),
  LSTM(256, 256),
  Affine(256, N),
  softmax)
 ```
--- a/3
+++ b/3
@ -1,3 +0,0 @@
 julia 0.5-
 TensorFlow
 Iterators
--- a/docs/make.jl
+++ b/docs/make.jl
@ -1,17 +0,0 @@
 using Documenter, Flux
 makedocs(modules=Module[Flux],
         doctest=false, clean=true,
         format = :html,
         sitename="Flux Documentation",
         pages = [
           "Home" => "index.md",
         ])
 deploydocs(
   repo = "github.com/MikeInnes/Flux.jl.git",
   target = "build",
   osname = "linux",
   julia = "0.5",
   deps = nothing,
   make = nothing)
--- a/docs/src/index.md
+++ b/docs/src/index.md
@ -1 +0,0 @@
 # Flux
--- a/examples/MNIST.jl
+++ b/examples/MNIST.jl
@ -1,22 +0,0 @@
 using Flux, MNIST
 data = [(Vector{Float32}(trainfeatures(i)), onehot(Float32, trainlabel(i), 0:9)) for i = 1:60_000]
 train = data[1:50_000]
 test = data[50_001:60_000]
 m = Chain(
  Input(784),
  Affine(128), relu,
  Affine( 64), relu,
  Affine( 10), softmax)
 # Convert to TensorFlow
 model = tf(m)
 # An example prediction pre-training
 model(data[1][1])
@time Flux.train!(model, train, test, η = 1e-3)
 # An example prediction post-training
 model(data[1][1])
--- a/examples/batching.jl
+++ b/examples/batching.jl
@ -1,26 +0,0 @@
 using Flux
 # Traditional Approach
 # 100 samples of sequences of 15 28×28 3-colour images
 rand(100, 15, 28, 28, 3)
 # Basic Batching
 data = Batch([collect(reshape(9(i-1):9i-1, 3, 3)) for i = 1:10])
 Batch(flatten.(data))
 data |> structure
 Batch(flatten.(data)) |> structure
 # Nested Batching
 # DNA seqence, encoded as a list of [A, T, G, C]
 x1 = Seq([[0,1,0,0], [1,0,0,0], [0,0,0,1]])
 x2 = Seq([[0,0,1,0], [0,0,0,1], [0,0,1,0]])
 data = Batch([x1, x2])
 data |> structure
--- a/examples/char-rnn.jl
+++ b/examples/char-rnn.jl
@ -1,38 +0,0 @@
 using Flux
 import StatsBase: wsample
 nunroll = 50
 nbatch = 50
 getseqs(chars, alphabet) = sequences((onehot(Float32, char, alphabet) for char in chars), nunroll)
 getbatches(chars, alphabet) = batches((getseqs(part, alphabet) for part in chunk(chars, nbatch))...)
 input = readstring("$(homedir())/Downloads/shakespeare_input.txt")
 alphabet = unique(input)
 N = length(alphabet)
 Xs, Ys = getbatches(input, alphabet), getbatches(input[2:end], alphabet)
 model = Chain(
  Input(N),
  LSTM(N, 256),
  LSTM(256, 256),
  Affine(256, N),
  softmax)
 m = tf(unroll(model, nunroll))
@time Flux.train!(m, Xs, Ys, η = 0.1, epoch = 1)
 string(map(c -> onecold(c, alphabet), m(first(first(Xs))))...)
 function sample(model, n, temp = 1)
  s = [rand(alphabet)]
  m = tf(unroll(model, 1))
  for i = 1:n
    push!(s, wsample(alphabet, softmax(m(Seq((onehot(Float32, s[end], alphabet),)))[1]./temp)))
  end
  return string(s...)
 end
 sample(model, 100)
--- a/examples/integration.jl
+++ b/examples/integration.jl
@ -1,70 +0,0 @@
 using Flux, Juno
 # Flux aims to provide high-level APIs that work well across backends, but in
 # some cases you may want to take advantage of features specific to a given
 # backend (or Flux may simply not have an implementation of that feature yet).
 # In these cases it's easy to "drop down" and use the backend's API directly,
 # where appropriate.
 # In this example, both things are happening; firstly, Flux doesn't yet support
 # ConvNets in the pure-Julia backend, but this is invisible thanks to the use of
 # a simple "shim" type, `Conv2D`. This is provided by the library but could easily
 # have been user-defined.
 # Secondly, we want to take advantage of TensorFlow.jl's training process and
 # optimisers. We can simply call `Tensor` exactly as we would on a regular
 # TensorFlow model, and the rest of the process trivially follows
 # TensorFlow.jl's usual API.
 conv1 = Chain(
  Reshape(28,28,1),
  Conv2D((5,5), out = 20), tanh,
  MaxPool((2,2), stride = (2,2)))
 conv2 = Chain(
  Input(12,12,20),
  Conv2D((5,5), in = 20, out = 50), tanh,
  MaxPool((2,2), stride = (2,2)))
 lenet = Chain(
  conv1, conv2, flatten,
  Affine(500), tanh,
  Affine(10), softmax)
 #--------------------------------------------------------------------------------
 # Now we can continue exactly as in plain TensorFlow, following
 #   https://github.com/malmaud/TensorFlow.jl/blob/master/examples/mnist_full.jl
 # (taking only the training and cost logic, not the graph building steps)
 using TensorFlow, Distributions
 include(Pkg.dir("TensorFlow", "examples", "mnist_loader.jl"))
 loader = DataLoader()
 session = Session(Graph())
 x  = placeholder(Float32)
 y′ = placeholder(Float32)
 y  = Tensor(lenet, x)
 cross_entropy = reduce_mean(-reduce_sum(y′.*log(y), reduction_indices=[2]))
 train_step = train.minimize(train.AdamOptimizer(1e-4), cross_entropy)
 accuracy = reduce_mean(cast(indmax(y, 2) .== indmax(y′, 2), Float32))
 run(session, initialize_all_variables())
@progress for i in 1:1000
    batch = next_batch(loader, 50)
    if i%100 == 1
        train_accuracy = run(session, accuracy, Dict(x=>batch[1], y′=>batch[2]))
        info("step $i, training accuracy $train_accuracy")
    end
    run(session, train_step, Dict(x=>batch[1], y′=>batch[2]))
 end
 testx, testy = load_test_set()
 test_accuracy = run(session, accuracy, Dict(x=>testx, y′=>testy))
 info("test accuracy $test_accuracy")
--- a/examples/translation.jl
+++ b/examples/translation.jl
@ -1,52 +0,0 @@
 # Based on https://arxiv.org/abs/1409.0473
 using Flux
 using Flux: flip
 # A recurrent model which takes a token and returns a context-dependent
 # annotation.
@net type Encoder
  forward
  backward
  token -> hcat(forward(token), backward(token))
 end
 Encoder(in::Integer, out::Integer) =
  Encoder(LSTM(in, out÷2), flip(LSTM(in, out÷2)))
 # A recurrent model which takes a sequence of annotations, attends, and returns
 # a predicted output token.
@net type Decoder
  attend
  recur
  state; y; N
  function (anns)
    energies = map(ann -> exp(attend(hcat(state{-1}, ann))[1]), seq(anns, N))
    weights = energies./sum(energies)
    ctx = sum(map((α, ann) -> α .* ann, weights, anns))
    (_, state), y = recur((state{-1},y{-1}), ctx)
    y
  end
 end
 Decoder(in::Integer, out::Integer; N = 1) =
  Decoder(Affine(in+out, 1),
          unroll1(LSTM(in, out)),
          param(zeros(1, out)), param(zeros(1, out)), N)
 # The model
 Nalpha  =  5 # The size of the input token vector
 Nphrase =  7 # The length of (padded) phrases
 Nhidden = 12 # The size of the hidden state
 encode = Encoder(Nalpha, Nhidden)
 decode = Chain(Decoder(Nhidden, Nhidden, N = Nphrase), Affine(Nhidden, Nalpha), softmax)
 model = Chain(
  unroll(encode, Nphrase, stateful = false),
  unroll(decode, Nphrase, stateful = false, seq = false))
 xs = Batch([Seq(rand(Float32, Nalpha) for _ = 1:Nphrase)])
--- a/src/Flux.jl
+++ b/src/Flux.jl
@ -1,36 +0,0 @@
 module Flux
 using MacroTools, Lazy, DataFlow, Juno
 using DataFlow: graphm, syntax, prewalk!, postwalk!, prewalk, postwalk,
  iscyclic, Constant, constant, isconstant, group, Split, splitnode,
  detuple, value, inputs, thread!, value, inputs, Split, splitnode, inputnode,
  spliceinputs, bumpinputs, Frame
 using Juno: Tree, Row
 # Zero Flux Given
 include("model.jl")
 include("utils.jl")
 include("data.jl")
 include("compiler/code.jl")
 include("compiler/loops.jl")
 include("compiler/interp.jl")
 include("compiler/shape.jl")
 include("layers/affine.jl")
 include("layers/activation.jl")
 include("layers/recurrent.jl")
 include("layers/shape.jl")
 include("layers/chain.jl")
 include("layers/shims.jl")
 include("dims/catmat.jl")
 include("dims/batching.jl")
 include("dims/seq.jl")
 include("cost.jl")
 include("backend/backend.jl")
 end # module
--- a/src/backend/backend.jl
+++ b/src/backend/backend.jl
@ -1,11 +0,0 @@
 export tf
 function loadtf()
  isdefined(Flux, :TF) && return
  @eval include(joinpath(dirname($@__FILE__), "tensorflow/tensorflow.jl"))
 end
 function tf(args...)
  loadtf()
  TF.tf(args...)
 end
--- a/src/backend/tensorflow/graph.jl
+++ b/src/backend/tensorflow/graph.jl
@ -1,72 +0,0 @@
 using Base: @get!
 using DataFlow: Constant, constant, Context, interpret, Split,
  interpv, ituple, ilambda, iconst, iline, stack, mux
 using Flux: imap
 using TensorFlow: RawTensor
 # TODO: implement Julia's type promotion rules
 node(x::Tuple) = map(node, x)
 node(x::Tensor) = x
 node(x::Variable) = x
 node(x::Number) = TensorFlow.constant(Float32(x))
 graph(::typeof(tuple), args...) = (args...,)
 graph(s::Split, t::Tuple) = t[s.n]
 graph(::typeof(softmax), x) = nn.softmax(x)
 graph(::typeof(relu), x) = nn.relu(x)
 graph(::typeof(σ), x) = nn.sigmoid(x)
 graph(::typeof(hcat), xs...) = concat(1, xs)
 graph(::typeof(seq), xs, n) = TensorFlow.unpack(xs, num = n, axis = 1)
 for op in (tanh, *, .*, +, -)
  @eval graph(::typeof($op), args...) = $op(node(args)...)
 end
 graph(::typeof(.-), args...) = -(node(args)...)
 # reshape hack due to https://github.com/malmaud/TensorFlow.jl/issues/79
 batchsize(x::Tensor) = reduce_sum(slice(TensorFlow.shape(x), [0], [1]))
 graph(::typeof(flatten), x) = reshape(x, pack([batchsize(x), Int32(-1)]))
 graph(r::Reshape, x) = reshape(x, pack([batchsize(x), map(Int32, r.dims)...]))
 graph(::Input, x) = x
 graph(p::MaxPool, x) =
  nn.max_pool(x, [1, p.size..., 1], [1, p.stride..., 1], "VALID")
 graph(op::Op, xs...) = op.f(xs...)
 function graph(ctx::Context, model, args...)
  node = graph(model, interpv(ctx, args)...)
  isa(node, Tensor) && (ctx[:stacks][node.op.name] = stack(ctx))
  return node
 end
 interp(ctx, c::Conv2D, x) =
  nn.conv2d(interpv(ctx, x), interp(ctx, Constant(c.filter)), [1,c.stride...,1], "VALID")
 interp{T<:AArray}(ctx, p::Constant{Flux.Param{T}}) =
  haskey(ctx[:params], p.value) ?
     ctx[:params][p.value] :
    (ctx[:params][p.value] = Variable(p.value.x))
 interp(ctx, p::Constant) = p.value
 function interp(ctx, model, args...)
  g = Flux.graph(model)
  g == nothing && return graph(ctx, model, args...)
  DataFlow.iscyclic(g) && error("This model has a cycle; try unrolling it first.")
  interpret(ctx, g, interpv(ctx, args)...)
 end
 function tograph(model, args...)
  ctx = Context(mux(iline, ilambda, ituple, imap, interp),
                params = ObjectIdDict(), stacks = Dict())
  out = interp(ctx, model, map(constant, args)...)
  return ctx[:params], ctx[:stacks], out
 end
 TensorFlow.Tensor(m::Flux.Model, args...) = tograph(m, args...)[2]
 RawTensor(data::Union{Batch,Seq}) = RawTensor(rawbatch(data))
--- a/src/backend/tensorflow/model.jl
+++ b/src/backend/tensorflow/model.jl
@ -1,98 +0,0 @@
 type Model
  model::Any
  session::Session
  params::Dict{Flux.Param,Tensor}
  stacks::Dict
  inputs::Vector{Tensor}
  output::Any
 end
 function makesession(model, inputs; session = Session(Graph()))
  params, stacks, output = tograph(model, inputs...)
  run(session, initialize_all_variables())
  Model(model, session, params, stacks, inputs, output)
 end
 function makesession(model, n::Integer; session = Session(Graph()))
  makesession(model, [placeholder(Float32) for _ = 1:n], session = session)
 end
 tf(model) = makesession(model, 1)
 function storeparams!(sess, params)
  for (p, t) in params
    p.x = run(sess, t)
  end
 end
 storeparams!(m::Model) = storeparams!(m.session, m.params)
 ismultioutput(m::Model) = !isa(m.output, Tensor)
 function batch(xs)
  dims = ndims(xs)-1
  T = Array{eltype(xs),dims}
  B = Array{eltype(xs),dims+1}
  Batch{T,B}(xs)
 end
 function tferr(model::Model, e)
  m = match(r"Node: ([\w\d]+) =", string(e.status))
  m == nothing && return
  node = m.captures[1]
  if haskey(model.stacks, node)
    stk = model.stacks[node]
    println("TensorFlow error occured at:")
    foreach(l -> println("$(l.file):$(l.line)"), stk)
  end
 end
 function runmodel(m::Model, args...)
  @assert length(args) == length(m.inputs)
  try
    output = run(m.session, m.output, Dict(zip(m.inputs, args)))
    ismultioutput(m) ? (batch.(output)...,) : batch(output)
  catch e
    isa(e, TensorFlow.TFException) || rethrow(e)
    tferr(m, e)
    rethrow(e)
  end
 end
 function (m::Model)(args::Batch...)
  runmodel(m, args...)
 end
 function (m::Model)(args...)
  output = m(map(batchone, args)...)
  ismultioutput(m) ? map(first, output) : first(output)
 end
 for f in :[back!, update!].args
  @eval function Flux.$f(m::Model, args...)
    error($(string(f)) * " is not yet supported on TensorFlow models")
  end
 end
 import Juno: info
 function Flux.train!(m::Model, train, test=[]; epoch = 1, η = 0.1,
                     loss = (y, y′) -> reduce_sum((y - y′).^2)/2,
                     opt = TensorFlow.train.GradientDescentOptimizer(η))
  i = 0
  Y = placeholder(Float32)
  Loss = loss(m.output, Y)
  minimize_op = TensorFlow.train.minimize(opt, Loss)
  for e in 1:epoch
    info("Epoch $e\n")
    @progress for (x, y) in train
      y, cur_loss, _ = run(m.session, vcat(m.output, Loss, minimize_op),
                           Dict(m.inputs[1]=>batchone(x), Y=>batchone(y)))
      if i % 5000 == 0
        @show y
        @show accuracy(m, test)
      end
      i += 1
    end
  end
 end
--- a/src/backend/tensorflow/recurrent.jl
+++ b/src/backend/tensorflow/recurrent.jl
@ -1,83 +0,0 @@
 # TODO: refactor, some of this is more general than just the TF backend
 type SeqModel
  m::Model
  state::Any
 end
 cgroup(xs...) = DataFlow.group(map(constant, xs)...)
 function makesession(model::Flux.Unrolled)
  sess = Session(Graph())
  input = placeholder(Float32)
  inputs = TensorFlow.unpack(input, num = model.steps, axis = 1)
  let params, stacks, outputs, instates, outstates
    if model.stateful
      instates = [placeholder(Float32) for _ in model.state]
      params, stacks, (outstates, outputs) = tograph(model, cgroup(instates...), cgroup(inputs...))
    else
      params, stacks, outputs = tograph(model, cgroup(inputs...))
    end
    output = TensorFlow.pack(outputs, axis = 1)
    run(sess, initialize_all_variables())
    sess, params, stacks, (instates, input), (outstates, output)
  end
 end
 function tf(model::Flux.Unrolled)
  sess, params, stacks, (instates, input), (outstates, output) = makesession(model)
  SeqModel(
    Model(model, sess, params, stacks,
          [instates..., input], [outstates..., output]),
    model.state)
 end
 function batchseq(xs)
  dims = ndims(xs)-2
  T = Array{eltype(xs),dims}
  S = Array{eltype(xs),dims+1}
  B = Array{eltype(xs),dims+2}
  Batch{Seq{T,S},B}(xs)
 end
 batchseq(xs::Batch) = batchseq(rawbatch(xs))
 TensorFlow.get_tensors(x::Tuple) = TensorFlow.get_tensors(collect(x))
 function (m::SeqModel)(x::BatchSeq)
  m.m.model.stateful || return batchseq(runmodel(m.m, x)[end])
  if isempty(m.state) || length(first(m.state)) ≠ length(x)
    m.state = batchone.(m.m.model.state)
  end
  output = runmodel(m.m, m.state..., x)
  m.state, output = output[1:end-1], output[end]
  return batchseq(output)
 end
 (m::SeqModel)(x::Seq) = first(m(batchone(x)))
 function Flux.train!(m::SeqModel, Xs, Ys; epoch = 1, η = 0.1,
                     loss = (y, ŷ) -> -reduce_sum(y .* log(ŷ)),
                     opt = () -> TensorFlow.train.GradientDescentOptimizer(η))
  batchlen, seqlen = length(first(Xs)), length(first(Xs)[1])
  state = batchone.(m.m.model.state)
  sess, params, stacks, (instates, input), (outstates, output) = makesession(m.m.model)
  Y = placeholder(Float32)
  Loss = loss(Y, output)/batchlen/seqlen
  minimize_op = TensorFlow.train.minimize(opt(), Loss)
  @progress "training" for e in 1:epoch
    info("Epoch $e\n")
    @progress "epoch" for (i, (x, y)) in enumerate(zip(Xs,Ys))
      out = run(sess, vcat(outstates..., output, Loss, minimize_op),
                merge(Dict(input=>batchone(x), Y=>batchone(y)),
                      Dict(zip(instates, state))))
      state = out[1:length(state)]
      loss = out[end-1]
      isnan(loss) && error("Loss is NaN")
      isinf(loss) && error("Loss is Inf")
      (i-1) % 10 == 0 && @show loss
    end
  end
  storeparams!(sess, params)
  return
 end
--- a/src/backend/tensorflow/tensorflow.jl
+++ b/src/backend/tensorflow/tensorflow.jl
@ -1,21 +0,0 @@
 module TF
 using ..Flux, DataFlow, TensorFlow, Juno
 import Flux: accuracy
 export tf
 type Op
  f
  shape
 end
 Op(f) = Op(f, (d...) -> nothing)
 Flux.shape(op::Op, d...) = op.shape(d...)
 include("graph.jl")
 include("model.jl")
 include("recurrent.jl")
 end
--- a/src/compiler/code.jl
+++ b/src/compiler/code.jl
@ -1,81 +0,0 @@
 import DataFlow: mapconst, cse
 export @net, @ml
 function process_func(ex, params = [])
  @capture(shortdef(ex), (args__,) -> body_)
  body = @> body MacroTools.flatten liftloops graphm DataFlow.il
  body = mapconst(x -> x in params ? :(self.$x) : x, body)
  return args, body
 end
 function makegraph(graph, args)
  @assert length(args) == 1
  graph = prewalk(graph) do v
    isa(value(v), Constant) && value(v).value == args[1] ?
      inputnode(1) :
      v
  end
  graph = map(graph) do x
    isa(x, Offset) ?
      :(Flux.Offset($(Expr(:quote, x.name)), $(x.n), self.$(x.name))) :
      x
  end
  vertex(:(Flux.Frame(self)), graph)
 end
 function build_type(T, params)
  @esc T
  ex = quote
    type $T <: Model
      $(params...)
    end
  end
  if any(x->isexpr(x, Symbol), params)
    push!(ex.args,
      :($T($(map(x->isexpr(x, Symbol) ? :($x::AArray) : x, params)...)) =
          $T($(map(x->isexpr(x, Symbol) ? :(param($x)) : namify(x), params)...))))
  end
  ex
 end
 import Lazy: groupby
 reifyparams(v::IVertex) = mapconst(x -> isa(x, Param) ? x.x : x, v)
 function process_type(ex)
  @capture(ex, type T_ fs__ end)
  @destruct [params = false || [],
             funcs  = true || []] = groupby(x->isexpr(x, :->, :function), fs)
  @assert length(funcs) == 1
  pnames = namify.(params)
  args, body = process_func(funcs[1], pnames)
  @assert length(args) == 1
  self = esc(:self)
  quote
    $(build_type(T, params))
    $(esc(:(self::$T)))($(args...),) = interpret(reifyparams(graph($self)), $(args...))
    $(esc(:(Flux.update!(self::$T, η)))) = ($(map(p -> :(update!($self.$p, η)), pnames)...);)
    $(esc(:(Flux.graph(self::$T)))) = $(DataFlow.constructor(mapconst(esc, makegraph(body, args))))
    nothing
  end
 end
 macro net(ex)
  isexpr(ex, :type) ? process_type(ex) :
  isexpr(ex, :->, :function) ? error("@net functions not implemented") :
  error("Unsupported model expression $ex")
 end
 function process_anon(ex)
  args, body = process_func(ex)
  @assert length(args) == 1
  :(Flux.Capacitor($(DataFlow.constructor(mapconst(esc, makegraph(body, args))))))
 end
 macro ml(ex)
  @capture(shortdef(ex), ((xs__,) -> body_ ) | (f_(xs__,) = body_)) ||
    error("@ml requires a function definition")
  ex = process_anon(:($(xs...,) -> $body))
  f == nothing ? ex : :($(esc(f)) = $ex)
 end
--- a/src/compiler/interp.jl
+++ b/src/compiler/interp.jl
@ -1,26 +0,0 @@
 using DataFlow: mux, interpret, interpv, ituple, ilambda, iconst, Context
 function astuple(xs::Vertex)
  isconstant(xs) && isa(value(xs).value, Tuple) ? value(xs).value :
  isa(xs, Vertex) && value(xs) == tuple ? inputs(xs) :
  nothing
 end
 astuple(xs::Tuple) = xs
 astuple(xs) = nothing
 function astuples(xs)
  xs = [astuple(x) for x in xs]
  all(x->!(x==nothing), xs) ? xs : nothing
 end
 function imap(cb, ctx, ::typeof(map), f, xs...)
  f, xs = interpv(ctx, (f, xs))
  xs′ = astuples(xs)
  xs′ ≠ nothing ?
    group(map(f, xs′...)...) :
    cb(ctx, map, constant(f), xs...)
 end
 imap(f, args...) = f(args...)
--- a/src/compiler/loops.jl
+++ b/src/compiler/loops.jl
@ -1,134 +0,0 @@
 export unroll, unroll1
 type Offset
  name::Symbol
  n::Int
  default::Nullable{Param}
 end
 Offset(name, n) = Offset(name, n, nothing)
 Base.:-(o::Offset) = Offset(o.name, -o.n, o.default)
 function liftloops(ex)
  ex = DataFlow.normedges(ex)
  decls = Dict()
  ex = MacroTools.postwalk(ex) do ex
    @capture(ex, x_{n_}) || return ex
    haskey(decls, (x,n)) && return namify(decls[(x,n)])
    @gensym edge
    decls[(x,n)] = :($edge = $(Offset(x,n))($x))
    edge
  end
  prepend!(ex.args, collect(values(decls)))
  ex
 end
 function hasloops(model)
  g = graph(model)
  g == nothing && return false
  iscyclic(g) && return true
  result = false
  map(m -> hasloops(m) && (result = true), g)
  return result
 end
 function atomise(model)
  postwalk(graph(model)) do v
    hasloops(value(v)) || return v
    spliceinputs(atomise(value(v)), inputs(v)...)
  end
 end
 function collect_state(v::IVertex)
  state = typeof(v)[]
  offset = Int[]
  default = Param[]
  prewalk!(v) do v
    isa(value(v), Offset) || return v
    if (i = findfirst(state, v[1])) == 0
      push!(state, v[1])
      push!(offset, max(0, -value(v).n))
      push!(default, get(value(v).default))
    else
      offset[i] = max(offset[i], -value(v).n)
    end
    v
  end
  return state, offset, default
 end
 hiddeninput(n) = vertex(Split(n), inputnode(1))
 function create_steps(v::IVertex, n; seq = true, stateful = true)
  [(stateful ? bumpinputs : copy)(seq ? spliceinputs(v, hiddeninput(i)) : v) for i = 1:n]
 end
 function getvar(n, step, steps, offset, default; stateful = true)
  if stateful && step < 1
    hiddeninput(sum(offset[1:n-1]) + 1 - step)
  elseif step ∉ 1:length(steps)
    constant(default[n])
  else
    steps[step][1,n]
  end
 end
 function stateout(steps, offset, default)
  outs = []
  defaults = []
  for i = 1:length(offset), j = 1:offset[i]
    push!(outs, getvar(i, length(steps)-j+1, steps, offset, default))
    push!(defaults, default[i])
  end
  group(outs...), defaults
 end
 function unrollgraph(v::IVertex, n; seq = true, stateful = true)
  state, offset, default = collect_state(v)
  v = group(group(state...), v)
  steps = create_steps(v, n, seq = seq, stateful = stateful)
  for i = 1:n
    vars = inputs(steps[i][1])
    postwalk!(steps[i]) do v
      isa(value(v), Offset) || return v
      varid = findfirst(vars,v[1])
      getvar(varid, value(v).n + i, steps, offset, default, stateful = stateful)
    end
  end
  out = group(map(x->x[2], steps)...)
  if stateful
    state, defaults = stateout(steps, offset, default)
    group(state,out), map(Flux.state, defaults)
  else
    out, []
  end
 end
 unrollgraph(m, n; kws...) = unrollgraph(atomise(m), n; kws...)
 # TODO: perhaps split into SeqModel + StatefulModel
 type Unrolled <: Model
  model
  graph::IVertex{Any}
  state::Vector{Any}
  stateful::Bool
  steps::Int
 end
 (m::Unrolled)(xs...) = interpret(reifyparams(m.graph), xs...)
 graph(u::Unrolled) = u.graph
 function unroll(model, n; seq = true, stateful = true)
  graph, state = unrollgraph(model, n; seq = seq, stateful = stateful)
  seq || stateful ? Unrolled(model, graph, state, stateful, n) : Capacitor(graph)
 end
 function unroll1(model)
  graph, state = unrollgraph(model, 1; seq = false)
  graph = group(graph[1], map(x->x[1], inputs(graph)[2:end])...)
  Unrolled(model, graph, state, false, 1)
 end
 flip(model) = Capacitor(map(x -> isa(x, Offset) ? -x : x, atomise(model)))
--- a/src/compiler/shape.jl
+++ b/src/compiler/shape.jl
@ -1,48 +0,0 @@
 using DataFlow: ilinev, iargs, applylines, Line
 type Hint
  typ
 end
 DataFlow.tocall(h::Hint, x) = :($x::$(h.typ))
 function gethint(v::IVertex)
  while isa(value(v), Union{Line,Frame}) v = v[1] end
  isa(value(v), Hint) && return value(v).typ
  return
 end
 ihint(f, ctx::Context, h::Hint, x) = vertex(h, x)
 ihint(f, args...) = f(args...)
 hintify(c::Constant) = hintify(state(c.value))
 hintify(xs::AbstractArray) = vertex(Hint(size(xs)), constant(:_))
 interpshape = mux(ilinev, ihint, iargs, ituple, hintify)
 function hintify(f, xs...)
  sh = infer(f, map(gethint, xs)...)
  sh ≠ nothing ? vertex(Hint(sh), vertex(f, xs...)) :
  !any(x->x==nothing, xs) && graph(f) ≠ nothing ? interpret(Context(interpshape), graph(f), xs...) :
    vertex(f, xs...)
 end
 function shapesv(f, args...)
  (g = graph(f)) == nothing && return
  ins = [vertex(Hint(d), inputnode(i)) for (i,d) in enumerate(args)]
  interpret(Context(interpshape), g, ins...)
 end
 shapes(args...) = shapesv(args...) |> syntax |> applylines |> (x->prettify(x, lines=true))
 # Inference primitives
 infer(f, args...) = graph(f) == nothing ? nothing : gethint(shapesv(f, args...))
 function infer(::typeof(*), a::NTuple{2}, b::NTuple{2})
  a[2] == b[1] || return nothing
  (a[1], b[2])
 end
 # TODO: make correct
 infer(::typeof(+), a, b) = a
--- a/src/cost.jl
+++ b/src/cost.jl
@ -1,8 +0,0 @@
 export mse, mse!
 function mse!(Δ, pred, target)
  map!(-, Δ, pred, target)
  sumabs2(Δ)/2
 end
 mse(pred, target) = mse(similar(pred), pred, target)
--- a/src/data.jl
+++ b/src/data.jl
@ -1,36 +0,0 @@
 export onehot, onecold, chunk, partition, batches, sequences
 """
    onehot('b', ['a', 'b', 'c', 'd']) => [false, true, false, false]
    onehot(Float32, 'c', ['a', 'b', 'c', 'd']) => [0., 0., 1., 0.]
 Produce a one-hot-encoded version of an item, given a list of possible values
 for the item.
 """
 onehot(T::Type, label, labels) = T[i == label for i in labels]
 onehot(label, labels) = onehot(Int, label, labels)
 """
    onecold([0.0, 1.0, 0.0, ...],
            ['a', 'b', 'c', ...]) => 'b'
 The inverse of `onehot`; takes an output prediction vector and a list of
 possible values, and produces the appropriate value.
 """
 onecold(pred, labels = 1:length(pred)) = labels[findfirst(pred, maximum(pred))]
 using Iterators
 import Iterators: partition
 export partition
 Base.length(l::Iterators.Partition) = length(l.xs) ÷ l.step
 _partition(r::UnitRange, step::Integer) = (step*(i-1)+1:step*i for i in 1:(r.stop÷step))
 _partition(xs, step) = (xs[i] for i in _partition(1:length(xs), step))
 chunk(xs, n) = _partition(xs, length(xs)÷n)
 batches(xs...) = (Batch(x) for x in zip(xs...))
 sequences(xs, len) = (Seq(x) for x in partition(xs, len))
--- a/src/dims/batching.jl
+++ b/src/dims/batching.jl
@ -1,21 +0,0 @@
 export Batch, batchone
 immutable Batch{T,S} <: AbstractVector{T}
  data::CatMat{T,S}
 end
@forward Batch.data size, eltype, getindex, setindex!, rawbatch
 Batch(xs) = Batch(CatMat(xs))
 convert{T,S}(::Type{Batch{T,S}},storage::S) =
  Batch{T,S}(storage)
 batchone(x) = Batch((x,))
 batchone(x::Batch) = x
@render Juno.Inline b::Batch begin
  Tree(Row(Text("Batch of "), eltype(b),
           Juno.fade("[$(length(b))]")),
       Juno.trim(collect(b)))
 end
--- a/src/dims/catmat.jl
+++ b/src/dims/catmat.jl
@ -1,50 +0,0 @@
 import Base: eltype, size, getindex, setindex!, convert
 export CatMat, rawbatch
 immutable CatMat{T,S} <: AbstractVector{T}
  data::S
 end
 convert{T,S}(::Type{CatMat{T,S}},storage::S) =
  CatMat{T,S}(storage)
 eltype{T}(::CatMat{T}) = T
 size(b::CatMat) = (size(b.data, 1),)
 getindex(b::CatMat, i)::eltype(b) = slicedim(b.data, 1, i)
 setindex!(b::CatMat, v, i) = b[i, :] = v
 allequal(xs) = all(x -> x == first(xs), xs)
 function (::Type{CatMat{T,S}}){T,S}(xs, storage::S)
  @assert @>> xs map(size) allequal
  @assert size(storage) == (length(xs), size(first(xs))...)
  for i = 1:length(xs)
    storage[i, :] = xs[i]
  end
  return CatMat{T,S}(storage)
 end
 function (::Type{CatMat{T}}){T}(xs)
  xs′ = map(rawbatch, xs)
  storage = similar(first(xs′), (length(xs′), size(first(xs′))...))
  CatMat{T,typeof(storage)}(xs′, storage)
 end
 function CatMat(xs)
  xs = promote(xs...)
  CatMat{eltype(xs)}(xs)
 end
@render Juno.Inline b::CatMat begin
  Tree(Row(Text("CatMat of "), eltype(b),
           Juno.fade("[$(length(b))]")),
       Juno.trim(collect(b)))
 end
 rawbatch(xs) = xs
 rawbatch(xs::CatMat) = xs.data
--- a/src/dims/seq.jl
+++ b/src/dims/seq.jl
@ -1,20 +0,0 @@
 export seq, Seq, BatchSeq
 immutable Seq{T,S} <: AbstractVector{T}
  data::CatMat{T,S}
 end
@forward Seq.data size, eltype, getindex, setindex!, rawbatch
 Seq(xs) = Seq(CatMat(xs))
 convert{T,S}(::Type{Seq{T,S}},storage::S) =
  Seq{T,S}(storage)
@render Juno.Inline b::Seq begin
  Tree(Row(Text("Seq of "), eltype(b),
           Juno.fade("[$(length(b))]")),
       Juno.trim(collect(b)))
 end
 typealias BatchSeq{T<:Seq} Batch{T}
--- a/src/layers/activation.jl
+++ b/src/layers/activation.jl
@ -1,18 +0,0 @@
 export σ, relu, softmax, flatten
 σ(x) = 1 ./ (1 + exp.(-x))
 back!(::typeof(σ), Δ, x) = Δ .* σ(x)./(1.-σ(x))
 relu(x) = max(0, x)
 back!(::typeof(relu), Δ, x) = Δ .* (x .< 0)
 softmax(xs) = exp.(xs) ./ sum(exp.(xs))
 flatten(xs) = reshape(xs, length(xs))
 shape(::typeof(flatten), in) = prod(in)
 infer(::typeof(softmax), x) = x
 infer(::typeof(σ), x) = x
--- a/src/layers/affine.jl
+++ b/src/layers/affine.jl
@ -1,20 +0,0 @@
 export Affine
 # TODO: type hints for parameters
@net type Affine
  W
  b
  x -> x*W + b
 end
 Affine(in::Integer, out::Integer; init = initn) =
  Affine(init(in, out), init(1, out))
@net type Sigmoid
  layer::Model
  x -> σ(layer(x))
 end
 Sigmoid(in::Integer, out::Integer; init = randn) =
  Sigmoid(Affine(in, out, init = init))
--- a/src/layers/chain.jl
+++ b/src/layers/chain.jl
@ -1,32 +0,0 @@
 export Chain
 function inferchain(ms)
  chain = []
  sh = nothing
  for m in ms
    m = init(m, single(sh))
    sh = shape(m, sh)
    push!(chain, m)
  end
  return chain, sh
 end
 type Chain <: Model
  layers::Vector{Any}
  shape
  function Chain(ms...)
    ms, shape = inferchain(ms)
    return new(ms, shape)
  end
 end
@forward Chain.layers Base.getindex, Base.first, Base.last
 (s::Chain)(x) = foldl((x, m) -> m(x), x, s.layers)
 back!(s::Chain, Δ) = foldr((m, Δ) -> back!(m, Δ), Δ, s.layers)
 update!(s::Chain, η) = foreach(l -> update!(l, η), s.layers)
 graph(s::Chain) =
  foldl((v, m) -> vertex(m, v), constant(inputnode(1)), s.layers)
 shape(c::Chain, in) = c.shape
--- a/src/layers/recurrent.jl
+++ b/src/layers/recurrent.jl
@ -1,51 +0,0 @@
 export Recurrent, GatedRecurrent, LSTM
@net type Recurrent
  Wxy; Wyy; by
  y
  function (x)
    y = tanh( x * Wxy + y{-1} * Wyy + by )
  end
 end
 Recurrent(in, out; init = initn) =
  Recurrent(init((in, out)), init((out, out)), init(out), init(out))
@net type GatedRecurrent
  Wxr; Wyr; br
  Wxu; Wyu; bu
  Wxh; Wyh; bh
  y
  function (x)
    reset  = σ( x * Wxr + y{-1} * Wyr + br )
    update = σ( x * Wxu + y{-1} * Wyu + bu )
    y′ = tanh( x * Wxh + (reset .* y{-1}) * Wyh + bh )
    y = (1 .- update) .* y′ + update .* y{-1}
  end
 end
 GatedRecurrent(in, out; init = initn) =
  GatedRecurrent(vcat([[init((in, out)), init((out, out)), init(out)] for _ = 1:3]...)...,
       zeros(Float32, out))
@net type LSTM
  Wxf; Wyf; bf
  Wxi; Wyi; bi
  Wxo; Wyo; bo
  Wxc; Wyc; bc
  y; state
  function (x)
    # Gates
    forget = σ( x * Wxf + y{-1} * Wyf + bf )
    input  = σ( x * Wxi + y{-1} * Wyi + bi )
    output = σ( x * Wxo + y{-1} * Wyo + bo )
    # State update and output
    state′ = tanh( x * Wxc + y{-1} * Wyc + bc )
    state  = forget .* state{-1} + input .* state′
    y = output .* tanh(state)
  end
 end
 LSTM(in, out; init = initn) =
  LSTM(vcat([[init((in, out)), init((out, out)), init((1, out))] for _ = 1:4]...)...,
       zeros(Float32, out), zeros(Float32, out))
--- a/src/layers/shape.jl
+++ b/src/layers/shape.jl
@ -1,47 +0,0 @@
 export Input
 typealias Dims{N} NTuple{N,Int}
 dims(d::Dims) = d
 dims(i...) = (i...,)
 single(i) = i
 single(i::Dims) = length(i) == 1 ? first(i) : i
 # Shim for kicking off shape inference
 type ShapeError <: Exception
  layer
  shape
 end
 type Input{N} <: Model
  dims::Dims{N}
 end
 Input(i...) = Input(dims(i...))
 (::Input)(x) = x
 back!(::Input, Δ, x) = Δ
 # Initialise placeholder
 type Init{F}
  f::F
 end
 init(i::Init, input...) = i.f(input...)
 init(m, input...) = m
 # Shape inference API
 shape(x, in) = in
 shape(i::Input, _) = i.dims
 # Implementation for bundled layers
 shape(d::Affine, _) = length(state(d.b)) # TODO: could perhaps infer this
 Affine(out::Integer) = Init(in::Integer -> Affine(in, out))
--- a/src/layers/shims.jl
+++ b/src/layers/shims.jl
@ -1,44 +0,0 @@
 export Conv2D, MaxPool, Reshape
 type Conv2D <: Model
  filter::Param{Array{Float32,4}} # [height, width, inchans, outchans]
  stride::Dims{2}
 end
 Conv2D(size; in = 1, out = 1, stride = (1,1), init = initn) =
  Conv2D(param(initn(size..., in, out)), stride)
 shape(c::Conv2D, in::Dims{2}) =
  (map(i -> (in[i]-size(c.filter,i))÷c.stride[i]+1, (1,2))..., size(c.filter, 4))
 shape(c::Conv2D, in::Dims{3}) =
  shape(c, (in[1],in[2]))
 type MaxPool <: Model
  size::Dims{2}
  stride::Dims{2}
 end
 MaxPool(size; stride = (1,1)) =
  MaxPool(size, stride)
 shape(c::MaxPool, in::Dims{2}) =
  map(i -> (in[i]-c.size[i])÷c.stride[i]+1, (1,2))
 shape(c::MaxPool, in::Dims{3}) =
  (shape(c, (in[1],in[2]))..., in[3])
 shape(c::MaxPool, in) = throw(ShapeError(c, in))
 immutable Reshape{N}
  dims::Dims{N}
 end
 Reshape(dims::Integer...) = Reshape(dims)
 function shape(r::Reshape, dims)
    prod(dims) == prod(r.dims) || throw(ShapeError(r, dims))
    return r.dims
 end
 shape(r::Reshape, ::Void) = r.dims
--- a/src/model.jl
+++ b/src/model.jl
@ -1,111 +0,0 @@
 export Model, back!, update!, param
 # Basic model API
 """
    (m::Model)(X...) => Y
 A "model" is a function with state. For example, a logistic regression is the
 function
    x -> σ(x * W + b)
 where `W` and `b` are a trainable matrix and vector of weights repectively. The
 `Model` abstract type is used loosely; in general the concept of a model is
 closer to a protocol, and models don't need to inherit from this type. Normal
 Julia functions are models with 0 parameters, for example.
 """
 abstract Model
 """
    back!(m::Model, ΔY, X...) => ΔX
 Backpropagate the gradient `ΔY` through the model `m`, accumulating the
 gradients of any parameters. Returns the gradient of the input `X`. Gradients
 may be arrays or tuples of arrays (for multiple inputs/outputs).
 """
 back!(m::Model, Δ, xs...) = error("Backprop not implemented for $(typeof(m))")
 """
    update!(m::Model, η) => m
 Update the parameters of the model `m` using the accumulated gradients from
 `back!`, using the learning rate `η`.
 """
 update!(m, η) = m
 """
    graph(m::Model) => ::IVertex{Any} | nothing
 Returns the graph representation of the model, if any. Most models are built
 from lower-level components and can simply implement this method to get most of
 Flux's functionality. If this method isn't available, functionality like
 backpropagation or conversion for backend must be implemented on a case-by-case
 basis. Alternatively, one can implement this method and override individual
 methods as necessary.
 """
 graph(m) = nothing
 # Model parameters
 """
 A `Param` object stores a parameter array along with an accumulated delta to
 that array. When converting to backends like TensorFlow, identical `Param`s will
 result in identical variable objects, making model reuse trivial.
 """
 type Param{T}
  x::T
  Δx::T
 end
 """
    param(x::T) => ::Param{T}
 Convenience method for creating a `Param` object for a given array.
 """
 param(x) = Param(x, zero(x))
 state(p::Param) = p.x
 """
    accumulate!(p::Param, Δ) => p
 Accumulates the update `Δ` on `p`. The value of `p` won't change until
 `update!`.
 """
 function accumulate!(p::Param, Δ)
  p.Δx += Δ
  return p
 end
 """
    update!(p::Param)
 Apply the accumulated updates to the value of the parameter.
 """
 function update!(p::Param, η)
  p.x .-= p.Δx .* η
  p.Δx[:] = 0
  return p
 end
 state(x) = x
 accumulate!(x, Δ) = x
@forward Param.x Base.size
 function Base.show(io::IO, p::Param)
  print(io, "Param", size(p.x))
 end
 # Anonymous models
 export Capacitor
 type Capacitor <: Model
  graph::IVertex{Any}
 end
 (m::Capacitor)(xs...) = interpret(reifyparams(m.graph), xs...)
 graph(cap::Capacitor) = cap.graph
--- a/src/utils.jl
+++ b/src/utils.jl
@ -1,30 +0,0 @@
 export AArray
 const AArray = AbstractArray
 initn(dims...) = randn(Float32, dims...)/10
 function train!(m, train, test = []; epoch = 1, batch = 10, η = 0.1)
    i = 0
    Δ = zeros(length(train[1][2]))
    for _ in 1:epoch
      @progress for (x, y) in train
        i += 1
        pred = m(x)
        any(isnan, pred) && error("NaN")
        err = mse!(Δ, pred, y)
        back!(m, Δ, x)
        i % batch == 0 && update!(m, η)
        i % 1000 == 0 && @show accuracy(m, test)
      end
    end
    return m
 end
 function accuracy(m, data)
  correct = 0
  for (x, y) in data
    onecold(m(x)) == onecold(y) && (correct += 1)
  end
  return correct/length(data)
 end
--- a/test/backend.jl
+++ b/test/backend.jl
--- a/test/basic.jl
+++ b/test/basic.jl
@ -1,10 +0,0 @@
 xs = randn(10)' # TODO: batching semantics
 d = Affine(10, 20)
@test d(xs) == xs*d.W.x + d.b.x
 let
  @capture(syntax(d), _Frame(_Line(x_[1] * W_ + b_)))
  @test isa(x, Input) && isa(W, Param) && isa(b, Param)
 end
--- a/test/recurrent.jl
+++ b/test/recurrent.jl
--- a/test/runtests.jl
+++ b/test/runtests.jl
@ -1,10 +0,0 @@
 using Flux, DataFlow, MacroTools, Base.Test
 using Flux: graph, Param
 using DataFlow: Input, Line, Frame
 syntax(v::Vertex) = prettify(DataFlow.syntax(v))
 syntax(x) = syntax(graph(x))
 include("basic.jl")
 include("recurrent.jl")
 include("backend.jl")