From e08fd7a6d2323153a304d211f9b97c6c9c78074f Mon Sep 17 00:00:00 2001 From: Matthew Kelley Date: Tue, 26 Jun 2018 11:43:16 -0600 Subject: [PATCH 1/4] Added epsilon term to binarycrossentropy --- src/layers/stateless.jl | 6 +++--- test/layers/stateless.jl | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index ccd4fe4c..6fe28d30 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -15,9 +15,9 @@ function logitcrossentropy(logŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight end """ - binarycrossentropy(ŷ, y) + binarycrossentropy(ŷ, y; ϵ) -Return `-y*log(ŷ) - (1-y)*log(1-ŷ)`. +Return `-y*log(ŷ + ϵ) - (1-y)*log(1-ŷ + ϵ)`. The ϵ term provides numerical stability. julia> binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0.]) 3-element Array{Float64,1}: @@ -25,7 +25,7 @@ Return `-y*log(ŷ) - (1-y)*log(1-ŷ)`. 0.352317 0.86167 """ -binarycrossentropy(ŷ, y) = -y*log(ŷ) - (1 - y)*log(1 - ŷ) +binarycrossentropy(ŷ, y; ϵ=1e-7) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) """ logitbinarycrossentropy(logŷ, y) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index ecfa7014..7c641261 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -1,7 +1,9 @@ using Base.Test -using Flux: onehotbatch, mse, crossentropy, logitcrossentropy, +using Flux: onehotbatch, mse, crossentropy, logitcrossentropy, σ, binarycrossentropy, logitbinarycrossentropy +const ϵ = 1e-7 + @testset "losses" begin # First, regression-style y's y = [1, 1, 0, 0] @@ -40,10 +42,11 @@ using Flux: onehotbatch, mse, crossentropy, logitcrossentropy, logŷ, y = randn(3), rand(3) @testset "binarycrossentropy" begin - @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ)) - (1 - y).*log.(1 - σ.(logŷ)) + @test binarycrossentropy.(σ.(logŷ), y; ϵ=0) ≈ -y.*log.(σ.(logŷ)) - (1 - y).*log.(1 - σ.(logŷ)) + @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) + 1e-7) - (1 - y).*log.(1 - σ.(logŷ) + 1e-7) end - + @testset "logitbinarycrossentropy" begin - @test logitbinarycrossentropy.(logŷ, y) ≈ binarycrossentropy.(σ.(logŷ), y) + @test logitbinarycrossentropy.(logŷ, y) ≈ binarycrossentropy.(σ.(logŷ), y; ϵ=0) end end From ed032cdb1ed565509dd60b580ed0b68b0a2b422f Mon Sep 17 00:00:00 2001 From: Matthew Kelley Date: Tue, 26 Jun 2018 12:29:06 -0600 Subject: [PATCH 2/4] =?UTF-8?q?Change=20epsilon=20value=20to=20eps(y=CC=82?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/layers/stateless.jl | 4 ++-- test/layers/stateless.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 6fe28d30..ba80e8a6 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -15,7 +15,7 @@ function logitcrossentropy(logŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight end """ - binarycrossentropy(ŷ, y; ϵ) + binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) Return `-y*log(ŷ + ϵ) - (1-y)*log(1-ŷ + ϵ)`. The ϵ term provides numerical stability. @@ -25,7 +25,7 @@ Return `-y*log(ŷ + ϵ) - (1-y)*log(1-ŷ + ϵ)`. The ϵ term provides numerica 0.352317 0.86167 """ -binarycrossentropy(ŷ, y; ϵ=1e-7) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) +binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) """ logitbinarycrossentropy(logŷ, y) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index 7c641261..91d530ca 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -43,7 +43,7 @@ const ϵ = 1e-7 logŷ, y = randn(3), rand(3) @testset "binarycrossentropy" begin @test binarycrossentropy.(σ.(logŷ), y; ϵ=0) ≈ -y.*log.(σ.(logŷ)) - (1 - y).*log.(1 - σ.(logŷ)) - @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) + 1e-7) - (1 - y).*log.(1 - σ.(logŷ) + 1e-7) + @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) .+ eps.(ŷ)) - (1 - y).*log.(1 - σ.(logŷ) .+ eps.(ŷ)) end @testset "logitbinarycrossentropy" begin From 0e95be33269f0582aafd0de716b60793cdb8de35 Mon Sep 17 00:00:00 2001 From: Matthew Kelley Date: Tue, 26 Jun 2018 14:48:51 -0600 Subject: [PATCH 3/4] =?UTF-8?q?Call=20Flux.Tracker.data()=20on=20y=CC=82?= =?UTF-8?q?=20for=20bce?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/layers/stateless.jl | 3 ++- test/layers/stateless.jl | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index ba80e8a6..7d3eb8d0 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -25,7 +25,8 @@ Return `-y*log(ŷ + ϵ) - (1-y)*log(1-ŷ + ϵ)`. The ϵ term provides numerica 0.352317 0.86167 """ -binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) +# binarycrossentropy(ŷ, y; ϵ=eps(Flux.Tracker.data(ŷ))) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) +binarycrossentropy(ŷ, y; ϵ=eps(Flux.Tracker.data(ŷ))) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) """ logitbinarycrossentropy(logŷ, y) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index 91d530ca..31a67aa7 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -43,7 +43,7 @@ const ϵ = 1e-7 logŷ, y = randn(3), rand(3) @testset "binarycrossentropy" begin @test binarycrossentropy.(σ.(logŷ), y; ϵ=0) ≈ -y.*log.(σ.(logŷ)) - (1 - y).*log.(1 - σ.(logŷ)) - @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) .+ eps.(ŷ)) - (1 - y).*log.(1 - σ.(logŷ) .+ eps.(ŷ)) + @test binarycrossentropy.(σ.(logŷ), y) ≈ -y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 - y).*log.(1 - σ.(logŷ) .+ eps.(σ.(logŷ))) end @testset "logitbinarycrossentropy" begin From 864d72eef5e0488274e4fd5e8acd4f253d112112 Mon Sep 17 00:00:00 2001 From: Matthew Kelley Date: Tue, 26 Jun 2018 23:55:43 -0600 Subject: [PATCH 4/4] Overload Base.eps() for TrackedReal --- src/layers/stateless.jl | 3 +-- src/tracker/scalar.jl | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 7d3eb8d0..ba80e8a6 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -25,8 +25,7 @@ Return `-y*log(ŷ + ϵ) - (1-y)*log(1-ŷ + ϵ)`. The ϵ term provides numerica 0.352317 0.86167 """ -# binarycrossentropy(ŷ, y; ϵ=eps(Flux.Tracker.data(ŷ))) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) -binarycrossentropy(ŷ, y; ϵ=eps(Flux.Tracker.data(ŷ))) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) +binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) """ logitbinarycrossentropy(logŷ, y) diff --git a/src/tracker/scalar.jl b/src/tracker/scalar.jl index f94f1647..773943c0 100644 --- a/src/tracker/scalar.jl +++ b/src/tracker/scalar.jl @@ -31,6 +31,8 @@ Base.convert(::Type{TrackedReal{T}}, x::TrackedReal{S}) where {T,S} = Base.:(<)(x::TrackedReal, y::TrackedReal) = data(x) < data(y) Base.:(==)(x::TrackedReal, y::TrackedReal) = data(x) == data(y) +Base.eps(x::TrackedReal) = eps(data(x)) + for f in :[isinf, isnan, isfinite].args @eval Base.$f(x::TrackedReal) = Base.$f(data(x)) end