Merge pull request #92 from CarloLucibello/drop

add Dropout layer
2017-10-26 12:07:28 +01:00 · 2017-10-26 12:07:28 +01:00 · 4c1b1eb18c
commit 4c1b1eb18c
parent 0df300299f 84efbbcc84
7 changed files with 97 additions and 2 deletions
--- a/docs/src/models/layers.md
+++ b/docs/src/models/layers.md
@ -30,3 +30,11 @@ leakyrelu
 elu
 swish
 ```
+
+## Normalisation & Regularisation
+
+These layers don't affect the structure of the network but may improve training times or reduce overfitting.
+
+```@docs
+Dropout
+```
--- a/src/Flux.jl
+++ b/src/Flux.jl
@ -7,7 +7,7 @@ module Flux
 using Juno, Requires
 using Lazy: @forward

-export Chain, Dense, RNN, LSTM,
+export Chain, Dense, RNN, LSTM, Dropout,
  SGD, ADAM, Momentum, Nesterov,
  param, params, mapleaves

@ -27,5 +27,6 @@ include("tree.jl")
 include("layers/stateless.jl")
 include("layers/basic.jl")
 include("layers/recurrent.jl")
+include("layers/normalisation.jl")

 end # module
--- a/src/layers/basic.jl
+++ b/src/layers/basic.jl
@ -27,7 +27,7 @@ end
 children(c::Chain) = c.layers
 mapchildren(f, c::Chain) = Chain(f.(c.layers)...)

-(s::Chain)(x) = foldl((x, m) -> m(x), x, s.layers)
+(c::Chain)(x) = foldl((x, m) -> m(x), x, c.layers)

 Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...)

--- a/src/layers/normalisation.jl
+++ b/src/layers/normalisation.jl
@ -0,0 +1,45 @@
+"""
+    testmode!(m)
+    testmode!(m, false)
+
+Put layers like [`Dropout`](@ref) and `BatchNorm` into testing mode (or back to
+training mode with `false`).
+"""
+function testmode!(m, val::Bool=true)
+  prefor(x -> _testmode!(x, val), m)
+  return m
+end
+
+_testmode!(m, test) = nothing
+
+"""
+    Dropout(p)
+
+A Dropout layer. For each input, either sets that input to `0` (with probability
+`p`) or scales it by `1/(1-p)`. This is used as a regularisation, i.e. it
+reduces overfitting during training.
+
+Does nothing to the input once in [`testmode!`](@ref).
+"""
+mutable struct Dropout{F}
+  p::F
+  active::Bool
+end
+
+function Dropout(p)
+  @assert 0 ≤ p ≤ 1
+  Dropout{typeof(p)}(p, true)
+end
+
+function (a::Dropout)(x)
+  a.active || return x
+  y = similar(x)
+  rand!(y)
+  q = 1 - a.p
+  @inbounds for i=1:length(y)
+    y[i] = y[i] > a.p ? 1 / q : 0
+  end
+  return y .* x
+end
+
+_testmode!(a::Dropout, test) = (a.active = !test)
--- a/src/tracker/Tracker.jl
+++ b/src/tracker/Tracker.jl
@ -56,6 +56,18 @@ Base.similar(x::TrackedArray, dims::Union{AbstractUnitRange,Integer}...) =

 Base.similar(x::TrackedArray, T::Type) = similar(data(x), T)

+value(x) = x
+value(x::TrackedArray) = data(x)
+value(x::TrackedScalar) = data(x)[]
+
+Base.:(==)(x::TrackedArray, y) = value(x) == y
+Base.:(==)(y, x::TrackedArray) = y == value(x)
+Base.:(==)(x::TrackedArray, y::TrackedArray) = value(x) == value(x)
+
+Base.isless(x::TrackedScalar, y) = isless(value(x), y)
+Base.isless(x, y::TrackedScalar) = isless(x, value(y))
+Base.isless(x::TrackedScalar, y::TrackedScalar) = isless(value(x), value(y))
+
 Base.show(io::IO, ::Type{TrackedArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} =
  print(io, "TrackedArray{…,$A}")

--- a/test/layers/normalisation.jl
+++ b/test/layers/normalisation.jl
@ -0,0 +1,28 @@
+using Flux: testmode!
+
+@testset "Dropout" begin
+  x = [1.,2.,3.]
+  @test x == testmode!(Dropout(0.1))(x)
+  @test x == Dropout(0)(x)
+  @test zeros(x) == Dropout(1)(x)
+
+  x = rand(100)
+  m = Dropout(0.9)
+  y = m(x)
+  @test count(a->a==0, y) > 50
+  testmode!(m)
+  y = m(x)
+  @test count(a->a==0, y) == 0
+  testmode!(m, false)
+  y = m(x)
+  @test count(a->a==0, y) > 50
+
+  x = rand(100)
+  m = Chain(Dense(100,100),
+            Dropout(0.9))
+  y = m(x)
+  @test count(a->a == 0, y) > 50
+  testmode!(m)
+  y = m(x)
+  @test count(a->a == 0, y) == 0
+end
--- a/test/runtests.jl
+++ b/test/runtests.jl
@ -4,5 +4,6 @@ using Flux, Base.Test

 include("utils.jl")
 include("tracker.jl")
+include("layers/normalisation.jl")

 end