From 06003b72c7b894e3588388dae895eab2aff0ef16 Mon Sep 17 00:00:00 2001 From: chengchingwen Date: Tue, 22 Jan 2019 23:51:38 +0800 Subject: [PATCH 1/7] noise shape for dropout --- src/layers/normalise.jl | 4 ++-- test/layers/normalisation.jl | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 9201e991..9617b2c1 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -33,9 +33,9 @@ end _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) -function (a::Dropout)(x) +function (a::Dropout)(x, noise_shape=size(x)) a.active || return x - y = similar(x) + y = similar(x, noise_shape) rand!(y) y .= _dropout_kernel.(y, a.p, 1 - a.p) return x .* y diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl index 18276140..d7e944b5 100644 --- a/test/layers/normalisation.jl +++ b/test/layers/normalisation.jl @@ -26,6 +26,15 @@ using Flux.Tracker: data testmode!(m) y = m(x) @test count(a->a == 0, y) == 0 + + x = rand(100, 50) + m = Dropout(0.5) + y = m(x, (100, 1)) + c = map(i->count(a->a==0, @view y[:, i]), 1:50) + @test minimum(c) == maximum(c) + y = m(x, (1, 50)) + c = map(i->count(a->a==0, @view y[i, :]), 1:100) + @test minimum(c) == maximum(c) end @testset "BatchNorm" begin From 934f0840b2bd20dd43bb563d62ebd3199a87b592 Mon Sep 17 00:00:00 2001 From: chengchingwen Date: Thu, 14 Mar 2019 21:51:28 +0800 Subject: [PATCH 2/7] change API to dims --- src/layers/normalise.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 9617b2c1..4af6a196 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -31,11 +31,13 @@ function Dropout(p) Dropout{typeof(p)}(p, true) end +_dropout_shape(s, dims...) = tuple((i ∈ dims ? 1 : si for (i, si) ∈ enumerate(s))...) + _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) -function (a::Dropout)(x, noise_shape=size(x)) +function (a::Dropout)(x, dims=0) a.active || return x - y = similar(x, noise_shape) + y = similar(x, _dropout_shape(size(x), dims...)) rand!(y) y .= _dropout_kernel.(y, a.p, 1 - a.p) return x .* y From 59da68b4d93aefe835d95b4e3d34319f51096531 Mon Sep 17 00:00:00 2001 From: chengchingwen Date: Thu, 14 Mar 2019 21:55:37 +0800 Subject: [PATCH 3/7] update test --- test/layers/normalisation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl index d7e944b5..1aee1453 100644 --- a/test/layers/normalisation.jl +++ b/test/layers/normalisation.jl @@ -29,10 +29,10 @@ using Flux.Tracker: data x = rand(100, 50) m = Dropout(0.5) - y = m(x, (100, 1)) + y = m(x, 2) c = map(i->count(a->a==0, @view y[:, i]), 1:50) @test minimum(c) == maximum(c) - y = m(x, (1, 50)) + y = m(x, 1) c = map(i->count(a->a==0, @view y[i, :]), 1:100) @test minimum(c) == maximum(c) end From 261235311cbef5089d69e24d73de6bf0faab525f Mon Sep 17 00:00:00 2001 From: chengchingwen Date: Fri, 5 Apr 2019 01:19:20 +0800 Subject: [PATCH 4/7] change `dims` as unbroadcasted dims and keyword argument --- src/layers/normalise.jl | 7 ++++--- test/layers/normalisation.jl | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 4af6a196..d7905b2d 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -31,13 +31,14 @@ function Dropout(p) Dropout{typeof(p)}(p, true) end -_dropout_shape(s, dims...) = tuple((i ∈ dims ? 1 : si for (i, si) ∈ enumerate(s))...) +_dropout_shape(s, ::Colon) = size(s) +_dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(size(s)))...) _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) -function (a::Dropout)(x, dims=0) +function (a::Dropout)(x; dims = :) a.active || return x - y = similar(x, _dropout_shape(size(x), dims...)) + y = similar(x, _dropout_shape(x, dims)) rand!(y) y .= _dropout_kernel.(y, a.p, 1 - a.p) return x .* y diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl index 1aee1453..887dbac8 100644 --- a/test/layers/normalisation.jl +++ b/test/layers/normalisation.jl @@ -29,12 +29,12 @@ using Flux.Tracker: data x = rand(100, 50) m = Dropout(0.5) - y = m(x, 2) - c = map(i->count(a->a==0, @view y[:, i]), 1:50) - @test minimum(c) == maximum(c) - y = m(x, 1) + y = m(x; dims=2) c = map(i->count(a->a==0, @view y[i, :]), 1:100) @test minimum(c) == maximum(c) + y = m(x; dims=1) + c = map(i->count(a->a==0, @view y[:, i]), 1:50) + @test minimum(c) == maximum(c) end @testset "BatchNorm" begin From 5c5140683c47b74ae295d87b071bbc3dcc489a28 Mon Sep 17 00:00:00 2001 From: chengchingwen Date: Fri, 10 May 2019 23:45:50 +0800 Subject: [PATCH 5/7] make dims as field of Dropout --- src/layers/normalise.jl | 31 +++++++++++++++++++++++-------- test/layers/normalisation.jl | 7 ++++--- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index d7905b2d..b70323f1 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -13,22 +13,24 @@ end _testmode!(m, test) = nothing """ - Dropout(p) + Dropout(p, dims = :) A Dropout layer. For each input, either sets that input to `0` (with probability -`p`) or scales it by `1/(1-p)`. This is used as a regularisation, i.e. it -reduces overfitting during training. +`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted + dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is + used as a regularisation, i.e. it reduces overfitting during training. see also [`dropout`](@ref). Does nothing to the input once in [`testmode!`](@ref). """ mutable struct Dropout{F} p::F + dims::Union{Colon, Int, NTuple{N, Int} where N} active::Bool end -function Dropout(p) +function Dropout(p; dims = :) @assert 0 ≤ p ≤ 1 - Dropout{typeof(p)}(p, true) + Dropout{typeof(p)}(p, dims, true) end _dropout_shape(s, ::Colon) = size(s) @@ -36,14 +38,27 @@ _dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(s _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) -function (a::Dropout)(x; dims = :) - a.active || return x + +""" + dropout(x, p; dims = :) + +The dropout function. For each input, either sets that input to `0` (with probability +`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted + dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is + used as a regularisation, i.e. it reduces overfitting during training. +""" +function dropout(x, p; dims = :) y = similar(x, _dropout_shape(x, dims)) rand!(y) - y .= _dropout_kernel.(y, a.p, 1 - a.p) + y .= _dropout_kernel.(y, p, 1 - p) return x .* y end +function (a::Dropout)(x) + a.active || return x + return dropout(x, a.p; dims = a.dims) +end + _testmode!(a::Dropout, test) = (a.active = !test) """ diff --git a/test/layers/normalisation.jl b/test/layers/normalisation.jl index 887dbac8..6ec44467 100644 --- a/test/layers/normalisation.jl +++ b/test/layers/normalisation.jl @@ -28,11 +28,12 @@ using Flux.Tracker: data @test count(a->a == 0, y) == 0 x = rand(100, 50) - m = Dropout(0.5) - y = m(x; dims=2) + m = Dropout(0.5, dims = 2) + y = m(x) c = map(i->count(a->a==0, @view y[i, :]), 1:100) @test minimum(c) == maximum(c) - y = m(x; dims=1) + m = Dropout(0.5, dims = 1) + y = m(x) c = map(i->count(a->a==0, @view y[:, i]), 1:50) @test minimum(c) == maximum(c) end From bdf74fe342c5d36af52067205656207170ef66e6 Mon Sep 17 00:00:00 2001 From: chengchingwen Date: Tue, 14 May 2019 00:57:42 +0800 Subject: [PATCH 6/7] update NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 540ef726..9787efd8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # v0.8.0 +* [Dropoutnow has a `dims` argument for specifying the unbroadcast dimensions.](https://github.com/FluxML/Flux.jl/pull/563) * New [ConvTranspose layer](https://github.com/FluxML/Flux.jl/pull/311). * New [Maxout layer](https://github.com/FluxML/Flux.jl/pull/647) * Datasets are now [hash verified on download](https://github.com/FluxML/Flux.jl/pull/585) to avoid corruption. From 9c1bb93aa3aab07cc50c19d7cfd2038097282ae1 Mon Sep 17 00:00:00 2001 From: Peter Date: Tue, 14 May 2019 01:12:59 +0800 Subject: [PATCH 7/7] Update NEWS.md Co-Authored-By: Mike J Innes --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 9787efd8..8f2580b0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # v0.8.0 -* [Dropoutnow has a `dims` argument for specifying the unbroadcast dimensions.](https://github.com/FluxML/Flux.jl/pull/563) +* [Dropout now has a `dims` argument for specifying the unbroadcast dimensions.](https://github.com/FluxML/Flux.jl/pull/563) * New [ConvTranspose layer](https://github.com/FluxML/Flux.jl/pull/311). * New [Maxout layer](https://github.com/FluxML/Flux.jl/pull/647) * Datasets are now [hash verified on download](https://github.com/FluxML/Flux.jl/pull/585) to avoid corruption.