diff --git a/README.md b/README.md
index 0171625c..020238c3 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,9 @@ We can describe simple models through a convenient interface:
 ```julia
 m = Chain(
   Input(784),
-  Dense(128), relu,
-  Dense( 64), relu,
-  Dense( 10), softmax)
+  Affine(128), relu,
+  Affine( 64), relu,
+  Affine( 10), softmax)
 ```
 
 Models are simple functions with state, so we can immediately see what the network does:
@@ -30,7 +30,7 @@ Models are simple functions with state, so we can immediately see what the netwo
 m(randn(784)) #> [0.101, 0.101, 0.099, 0.100, ...]
 ```
 
-What if we need a custom layer? Here's one equivalent to `Dense` above:
+What if we need a custom layer? Here's one equivalent to `Affine` above:
 
 ```julia
 # Simple Julia type with two fields – @net defines some extra methods like the
@@ -55,10 +55,10 @@ We can already insert this model into combining models like `Chain`. If you want
   x -> σ(layer(x))
 end
 
-Perceptron(in, out) = Perceptron(Dense(in, out))
+Perceptron(in, out) = Perceptron(Affine(in, out))
 ```
 
-This defines a simple perceptron layer which we can use in the same way as `Dense` above. We can draw arbitrary graphs, including those with splits, combines or recurrences, in a fully declarative way *[this API is a WIP]*:
+This defines a simple perceptron layer which we can use in the same way as `Affine` above. We can draw arbitrary graphs, including those with splits, combines or recurrences, in a fully declarative way *[this API is a WIP]*:
 
 ```julia
 @net type SimpleRecurrent
@@ -82,7 +82,7 @@ end
 end
 ```
 
-Though further from the equations, this has the advantage of further reuse and customizability. For example, `layer` could be a simple `Dense(x, y)` as before or it could be a `Dropout(Dense(x, y))` in order to add dropout to the recurrent layer.
+Though further from the equations, this has the advantage of further reuse and customizability. For example, `layer` could be a simple `Affine(x, y)` as before or it could be a `Dropout(Affine(x, y))` in order to add dropout to the recurrent layer.
 
 When it comes time to train the model, we have a number of options for tweaking its implementation, like the backend used or unrolling settings. In Flux this is as simple as calling some functions on the original model:
 
diff --git a/examples/MNIST.jl b/examples/MNIST.jl
index beaf524f..70d26ee1 100644
--- a/examples/MNIST.jl
+++ b/examples/MNIST.jl
@@ -6,9 +6,9 @@ test = data[50_001:60_000]
 
 m = Chain(
   Input(784),
-  Dense(128), relu,
-  Dense( 64), relu,
-  Dense( 10), softmax)
+  Affine(128), relu,
+  Affine( 64), relu,
+  Affine( 10), softmax)
 
 # Convert to TensorFlow
 model = tf(m)
diff --git a/examples/char-rnn.jl b/examples/char-rnn.jl
index 27f8f633..9ce0e3b0 100644
--- a/examples/char-rnn.jl
+++ b/examples/char-rnn.jl
@@ -14,7 +14,7 @@ model = Chain(
   Input(N),
   LSTM(N, 256),
   LSTM(256, 256),
-  Dense(256, N),
+  Affine(256, N),
   softmax)
 
 m = tf(unroll(model, 50));
diff --git a/examples/integration.jl b/examples/integration.jl
index e1dcc67f..704446e8 100644
--- a/examples/integration.jl
+++ b/examples/integration.jl
@@ -28,8 +28,8 @@ conv2 = Chain(
 
 lenet = Chain(
   conv1, conv2, flatten,
-  Dense(500), tanh,
-  Dense(10), softmax)
+  Affine(500), tanh,
+  Affine(10), softmax)
 
 #--------------------------------------------------------------------------------
 
diff --git a/src/Flux.jl b/src/Flux.jl
index 67b940a2..75a37ba9 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -17,7 +17,7 @@ include("compiler/diff.jl")
 include("compiler/code.jl")
 include("compiler/loops.jl")
 
-include("layers/dense.jl")
+include("layers/Affine.jl")
 include("layers/recurrent.jl")
 include("layers/shape.jl")
 include("layers/chain.jl")
diff --git a/src/compiler/loops.jl b/src/compiler/loops.jl
index 449ce66f..03ba463d 100644
--- a/src/compiler/loops.jl
+++ b/src/compiler/loops.jl
@@ -54,8 +54,8 @@ end
 
 hiddeninput(n) = vertex(Split(n), inputnode(1))
 
-function create_steps(v::IVertex, n)
-  [bumpinputs(spliceinputs(v, hiddeninput(i))) for i = 1:n]
+function create_steps(v::IVertex, n; seq = true)
+  [bumpinputs(seq ? spliceinputs(v, hiddeninput(i)) : v) for i = 1:n]
 end
 
 function getvar(n, step, steps, offset, default)
@@ -78,10 +78,10 @@ function stateout(steps, offset, default)
   group(outs...), defaults
 end
 
-function unrollgraph(v::IVertex, n)
+function unrollgraph(v::IVertex, n; seq = true)
   state, offset, default = collect_state(v)
   v = group(group(state...), v)
-  steps = create_steps(v, n)
+  steps = create_steps(v, n, seq = seq)
   for i = 1:n
     vars = inputs(steps[i][1])
     postwalk!(steps[i]) do v
@@ -94,7 +94,7 @@ function unrollgraph(v::IVertex, n)
   group(state,group(map(x->x[2], steps)...)), map(Flux.state, defaults)
 end
 
-unrollgraph(m, n) = unrollgraph(atomise(m), n)
+unrollgraph(m, n; seq = true) = unrollgraph(atomise(m), n; seq = seq)
 
 type Unrolled <: Model
   model
@@ -105,6 +105,6 @@ end
 
 graph(u::Unrolled) = u.graph
 
-unroll(model, n) = Unrolled(model, unrollgraph(model, n)..., n)
+unroll(model, n; seq = true) = Unrolled(model, unrollgraph(model, n; seq = seq)..., n)
 
 flip(model) = Capacitor(map(x -> isa(x, Offset) ? -x : x, atomise(model)))
diff --git a/src/layers/dense.jl b/src/layers/dense.jl
index 3f582ea2..81fef977 100644
--- a/src/layers/dense.jl
+++ b/src/layers/dense.jl
@@ -1,15 +1,15 @@
-export Dense
+export Affine
 
 # TODO: type hints for parameters
 
-@net type Dense
+@net type Affine
   W
   b
   x -> x*W + b
 end
 
-Dense(in::Integer, out::Integer; init = initn) =
-  Dense(init(in, out), init(1, out))
+Affine(in::Integer, out::Integer; init = initn) =
+  Affine(init(in, out), init(1, out))
 
 @net type Sigmoid
   layer::Model
@@ -17,4 +17,4 @@ Dense(in::Integer, out::Integer; init = initn) =
 end
 
 Sigmoid(in::Integer, out::Integer; init = randn) =
-  Sigmoid(Dense(in, out, init = init))
+  Sigmoid(Affine(in, out, init = init))
diff --git a/src/layers/shape.jl b/src/layers/shape.jl
index 07fc708a..41c1d97b 100644
--- a/src/layers/shape.jl
+++ b/src/layers/shape.jl
@@ -42,6 +42,6 @@ shape(i::Input, _) = i.dims
 
 # Implementation for bundled layers
 
-shape(d::Dense, _) = length(state(d.b)) # TODO: could perhaps infer this
+shape(d::Affine, _) = length(state(d.b)) # TODO: could perhaps infer this
 
-Dense(out::Integer) = Init(in::Integer -> Dense(in, out))
+Affine(out::Integer) = Init(in::Integer -> Affine(in, out))