diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 00000000..bf16f8ed
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,10 @@
+variables:
+  CI_IMAGE_TAG: 'cuda'
+
+include:
+  - 'https://raw.githubusercontent.com/JuliaGPU/gitlab-ci/master/templates/v1/common.yml'
+  - 'https://raw.githubusercontent.com/JuliaGPU/gitlab-ci/master/templates/v1/test_v1.0.yml'
+  - 'https://raw.githubusercontent.com/JuliaGPU/gitlab-ci/master/templates/v1/test_dev.yml'
+
+test:dev:
+  allow_failure: true
diff --git a/NEWS.md b/NEWS.md
index 4cf755e7..681e701f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # v0.8.0
 
 * New [ConvTranspose layer](https://github.com/FluxML/Flux.jl/pull/311).
+* New [Maxout layer](https://github.com/FluxML/Flux.jl/pull/647)
 * Datasets are now [hash verified on download](https://github.com/FluxML/Flux.jl/pull/585) to avoid corruption.
 * We now [zero the initial state for RNNs](https://github.com/FluxML/Flux.jl/pull/590/).
 * [Normalisation can now work on arbitrary `dims`.](https://github.com/FluxML/Flux.jl/pull/592)
diff --git a/docs/src/training/training.md b/docs/src/training/training.md
index ae483783..679bbd0b 100644
--- a/docs/src/training/training.md
+++ b/docs/src/training/training.md
@@ -93,3 +93,11 @@ evalcb() = @show(loss(test_x, test_y))
 Flux.train!(objective, ps, data, opt,
             cb = throttle(evalcb, 5))
 ```
+
+Calling `Flux.stop()` in a callback will exit the training loop early.
+
+```julia
+cb = function ()
+  accuracy() > 0.9 && Flux.stop()
+end
+```
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
index b39a0de2..005915bb 100644
--- a/src/layers/basic.jl
+++ b/src/layers/basic.jl
@@ -167,6 +167,8 @@ function Maxout(f, n_alts)
   return Maxout(over)
 end
 
+@treelike Maxout
+
 function (mo::Maxout)(input::AbstractArray)
     mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over)
 end
diff --git a/test/layers/basic.jl b/test/layers/basic.jl
index 3a3b1695..3c5229f4 100644
--- a/test/layers/basic.jl
+++ b/test/layers/basic.jl
@@ -53,5 +53,11 @@ using Test, Random
             target = [0.5, 0.7].*input
             @test mo(input) == target
         end
+
+        @testset "params" begin
+            mo = Maxout(()->Dense(32, 64), 4)
+            ps = params(mo)
+            @test length(ps) == 8  #4 alts, each with weight and bias
+        end
     end
 end