From 633f0df01fc0e200e4a03cb7f3e93f868a7d1b72 Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Tue, 12 Mar 2019 02:31:42 +0530 Subject: [PATCH 01/15] Added new loss functions. --- src/layers/stateless.jl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 23fd1651..3bb48f1f 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -54,3 +54,31 @@ function normalise(x::AbstractArray, dims) Base.depwarn("`normalise(x::AbstractArray, dims)` is deprecated, use `normalise(a, dims=dims)` instead.", :normalise) normalise(x, dims = dims) end + +""" + Kullback Leibler Divergence(KL Divergence) +KLDivergence is a measure of how much one probability distribution is different from the other. +It is always non-negative and zero only when both the distributions are equal everywhere. + +""" +function KLDivergence(ŷ, y) + entropy = sum(y .* log.(y)) *1 //size(y,2) + cross_entropy = crossentropy(ŷ, y) + return entropy + cross_entropy +end + +""" + Poisson Loss function +Poisson loss function is a measure of how the predicted distribution diverges from the expected distribution. + +""" +Poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) + +""" + Logcosh Loss function +""" + +logcosh(ŷ, y) = sum(log.(cosh.(ŷ .- y))) + +Hinge(ŷ, y) = sum(max.(0.0, 1 .- ŷ .* y)) *1 // size(y,2) + From 61386c04f8ac8a6badcf8ca889169eb623b5327b Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Tue, 12 Mar 2019 02:36:37 +0530 Subject: [PATCH 02/15] Tests added. --- test/layers/stateless.jl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index 34abb8cb..336adc12 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -49,7 +49,31 @@ const ϵ = 1e-7 @testset "logitbinarycrossentropy" begin @test logitbinarycrossentropy.(logŷ, y) ≈ binarycrossentropy.(σ.(logŷ), y; ϵ=0) end + + y = [1 2 3] + y1 = [4.0 5.0 6.0] + @testset "KLDivergence" begin + @test Flux.KLDivergence(y, y1) ≈ 4.761838062403337 + @test Flux.KLDivergence(y, y) ≈ 0 + end + @testset "Hinge" begin + @test Flux.Hinge(y, y1) ≈ 0 + @test Flux.Hinge(y, 0.2 .* y) ≈ 0.33333 + end + + y = [0.1 0.2 0.3] + y1 = [0.4 0.5 0.6] + @testset "Poisson" begin + @test Flux.Poisson(y, y1) ≈ 1.0160455586700767 + @test Flux.Poisson(y, y) ≈ 0.5044459776946685 + end + + @testset "logcosh" begin + @test Flux.logcosh(y, y1) ≈ 0.13302230977782092 + @test Flux.logcosh(y, y) ≈ 0 + end + @testset "no spurious promotions" begin for T in (Float16, Float32, Float64) y = rand(T, 2) From 57a52e33750c9f8afcf7a8937abbbee766419121 Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Tue, 12 Mar 2019 02:58:32 +0530 Subject: [PATCH 03/15] Error of recurrent decimals fixed. --- test/layers/stateless.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index 336adc12..f961ed2f 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -56,10 +56,12 @@ const ϵ = 1e-7 @test Flux.KLDivergence(y, y1) ≈ 4.761838062403337 @test Flux.KLDivergence(y, y) ≈ 0 end - + + y = [1 2 3 4] + y1 = [5.0 6.0 7.0 8.0] @testset "Hinge" begin @test Flux.Hinge(y, y1) ≈ 0 - @test Flux.Hinge(y, 0.2 .* y) ≈ 0.33333 + @test Flux.Hinge(y, 0.5 .* y) ≈ 0.125 end y = [0.1 0.2 0.3] From c4d12e57fe6a3ea0473e5fa6145d1d55789c9358 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Tue, 26 Mar 2019 03:09:48 +0530 Subject: [PATCH 04/15] Loss function names in lowercase --- src/layers/stateless.jl | 9 +++------ test/layers/stateless.jl | 18 +++++++++--------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 3bb48f1f..424db1df 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -59,9 +59,8 @@ end Kullback Leibler Divergence(KL Divergence) KLDivergence is a measure of how much one probability distribution is different from the other. It is always non-negative and zero only when both the distributions are equal everywhere. - """ -function KLDivergence(ŷ, y) +function kldivergence(ŷ, y) entropy = sum(y .* log.(y)) *1 //size(y,2) cross_entropy = crossentropy(ŷ, y) return entropy + cross_entropy @@ -70,15 +69,13 @@ end """ Poisson Loss function Poisson loss function is a measure of how the predicted distribution diverges from the expected distribution. - """ -Poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) +poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) """ Logcosh Loss function """ - logcosh(ŷ, y) = sum(log.(cosh.(ŷ .- y))) -Hinge(ŷ, y) = sum(max.(0.0, 1 .- ŷ .* y)) *1 // size(y,2) +hinge(ŷ, y) = sum(max.(0.0, 1 .- ŷ .* y)) *1 // size(y,2) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index f961ed2f..97bfea10 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -52,23 +52,23 @@ const ϵ = 1e-7 y = [1 2 3] y1 = [4.0 5.0 6.0] - @testset "KLDivergence" begin - @test Flux.KLDivergence(y, y1) ≈ 4.761838062403337 - @test Flux.KLDivergence(y, y) ≈ 0 + @testset "kldivergence" begin + @test Flux.kldivergence(y, y1) ≈ 4.761838062403337 + @test Flux.kldivergence(y, y) ≈ 0 end y = [1 2 3 4] y1 = [5.0 6.0 7.0 8.0] - @testset "Hinge" begin - @test Flux.Hinge(y, y1) ≈ 0 - @test Flux.Hinge(y, 0.5 .* y) ≈ 0.125 + @testset "hinge" begin + @test Flux.hinge(y, y1) ≈ 0 + @test Flux.hinge(y, 0.5 .* y) ≈ 0.125 end y = [0.1 0.2 0.3] y1 = [0.4 0.5 0.6] - @testset "Poisson" begin - @test Flux.Poisson(y, y1) ≈ 1.0160455586700767 - @test Flux.Poisson(y, y) ≈ 0.5044459776946685 + @testset "poisson" begin + @test Flux.poisson(y, y1) ≈ 1.0160455586700767 + @test Flux.poisson(y, y) ≈ 0.5044459776946685 end @testset "logcosh" begin From 6f078857beda49e7f1d565cc7e4dded6c55db3d0 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Tue, 26 Mar 2019 03:15:28 +0530 Subject: [PATCH 05/15] Added reference links to loss functions --- src/layers/stateless.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 424db1df..aaefcee9 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -59,6 +59,7 @@ end Kullback Leibler Divergence(KL Divergence) KLDivergence is a measure of how much one probability distribution is different from the other. It is always non-negative and zero only when both the distributions are equal everywhere. +https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence """ function kldivergence(ŷ, y) entropy = sum(y .* log.(y)) *1 //size(y,2) @@ -69,6 +70,7 @@ end """ Poisson Loss function Poisson loss function is a measure of how the predicted distribution diverges from the expected distribution. +https://isaacchanghau.github.io/post/loss_functions/ """ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) From 930adb122dc5443f205ced401b5275ddbeeb67ca Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Mon, 25 Mar 2019 23:43:06 +0530 Subject: [PATCH 06/15] Avoided promotion to Float64 in hinge. --- src/layers/stateless.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index aaefcee9..3221ddff 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -79,5 +79,5 @@ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) """ logcosh(ŷ, y) = sum(log.(cosh.(ŷ .- y))) -hinge(ŷ, y) = sum(max.(0.0, 1 .- ŷ .* y)) *1 // size(y,2) +hinge(ŷ, y) = sum(max.(0, 1 .- ŷ .* y)) *1 // size(y,2) From 4efcc69ba5de4f68f5e0e0dc474b44ddf9388615 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Tue, 26 Mar 2019 23:23:02 +0530 Subject: [PATCH 07/15] logcosh averaged --- src/layers/stateless.jl | 2 +- test/layers/stateless.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 3221ddff..6b6abb5e 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -77,7 +77,7 @@ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) """ Logcosh Loss function """ -logcosh(ŷ, y) = sum(log.(cosh.(ŷ .- y))) +logcosh(ŷ, y) = sum(log.(cosh.(ŷ .- y))) *1 // size(y,2) hinge(ŷ, y) = sum(max.(0, 1 .- ŷ .* y)) *1 // size(y,2) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index 97bfea10..e8d881fb 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -72,7 +72,7 @@ const ϵ = 1e-7 end @testset "logcosh" begin - @test Flux.logcosh(y, y1) ≈ 0.13302230977782092 + @test Flux.logcosh(y, y1) ≈ 0.044340769925940306 @test Flux.logcosh(y, y) ≈ 0 end From b84ab7ac95aa1eca3ec302bc7f997518b3e71612 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Fri, 5 Apr 2019 03:16:54 +0530 Subject: [PATCH 08/15] Removed logcosh --- src/layers/stateless.jl | 5 ----- test/layers/stateless.jl | 5 ----- 2 files changed, 10 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 6b6abb5e..3444f0f4 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -74,10 +74,5 @@ https://isaacchanghau.github.io/post/loss_functions/ """ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) -""" - Logcosh Loss function -""" -logcosh(ŷ, y) = sum(log.(cosh.(ŷ .- y))) *1 // size(y,2) - hinge(ŷ, y) = sum(max.(0, 1 .- ŷ .* y)) *1 // size(y,2) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index e8d881fb..d912a5fe 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -70,11 +70,6 @@ const ϵ = 1e-7 @test Flux.poisson(y, y1) ≈ 1.0160455586700767 @test Flux.poisson(y, y) ≈ 0.5044459776946685 end - - @testset "logcosh" begin - @test Flux.logcosh(y, y1) ≈ 0.044340769925940306 - @test Flux.logcosh(y, y) ≈ 0 - end @testset "no spurious promotions" begin for T in (Float16, Float32, Float64) From 710084ffbfca78805d8c0fe41be8e9dbb58b3c4f Mon Sep 17 00:00:00 2001 From: thebhatman Date: Fri, 5 Apr 2019 23:50:16 +0530 Subject: [PATCH 09/15] Loss functions added to docs --- docs/src/training/training.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/src/training/training.md b/docs/src/training/training.md index ae483783..76c099eb 100644 --- a/docs/src/training/training.md +++ b/docs/src/training/training.md @@ -32,6 +32,18 @@ Flux.train!(loss, ps, data, opt) The objective will almost always be defined in terms of some *cost function* that measures the distance of the prediction `m(x)` from the target `y`. Flux has several of these built in, like `mse` for mean squared error or `crossentropy` for cross entropy loss, but you can calculate it however you want. +In-built loss functions: +```@docs +mse +crossentropy +logitcrossentropy +binarycrossentropy +logitbinarycrossentropy +kldivergence +poisson +hinge +``` + ## Datasets The `data` argument provides a collection of data to train with (usually a set of inputs `x` and target outputs `y`). For example, here's a dummy data set with only one data point: From ec35e9cbaa31bcdb37857c5bb39bbbfc22379e4e Mon Sep 17 00:00:00 2001 From: thebhatman Date: Mon, 30 Sep 2019 21:02:13 +0530 Subject: [PATCH 10/15] Loss functions docs added in layers.md --- docs/src/models/layers.md | 12 ++++++++++++ docs/src/training/training.md | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index f2bd8046..c439581c 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -66,3 +66,15 @@ AlphaDropout LayerNorm GroupNorm ``` + +## In-built loss functions: +```@docs +mse +crossentropy +logitcrossentropy +binarycrossentropy +logitbinarycrossentropy +kldivergence +poisson +hinge +``` \ No newline at end of file diff --git a/docs/src/training/training.md b/docs/src/training/training.md index cba1422c..679bbd0b 100644 --- a/docs/src/training/training.md +++ b/docs/src/training/training.md @@ -32,18 +32,6 @@ Flux.train!(loss, ps, data, opt) The objective will almost always be defined in terms of some *cost function* that measures the distance of the prediction `m(x)` from the target `y`. Flux has several of these built in, like `mse` for mean squared error or `crossentropy` for cross entropy loss, but you can calculate it however you want. -In-built loss functions: -```@docs -mse -crossentropy -logitcrossentropy -binarycrossentropy -logitbinarycrossentropy -kldivergence -poisson -hinge -``` - ## Datasets The `data` argument provides a collection of data to train with (usually a set of inputs `x` and target outputs `y`). For example, here's a dummy data set with only one data point: From ec886c8ce864721b4144cb749c458b3410c67946 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Thu, 3 Oct 2019 21:13:09 +0530 Subject: [PATCH 11/15] Added docstring for hinge loss --- src/layers/stateless.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index c3dd22b0..8cdac33d 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -50,11 +50,6 @@ function normalise(x::AbstractArray; dims=1) return (x .- μ′) ./ σ′ end -function normalise(x::AbstractArray, dims) - Base.depwarn("`normalise(x::AbstractArray, dims)` is deprecated, use `normalise(a, dims=dims)` instead.", :normalise) - normalise(x, dims = dims) -end - """ Kullback Leibler Divergence(KL Divergence) KLDivergence is a measure of how much one probability distribution is different from the other. @@ -74,4 +69,8 @@ https://isaacchanghau.github.io/post/loss_functions/ """ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) +""" + Hinge Loss function +Measures the loss given the prediction ŷ and true labels y(containing 1 or -1). This is usually used for measuring whether two inputs are similar or dissimilar +""" hinge(ŷ, y) = sum(max.(0, 1 .- ŷ .* y)) *1 // size(y,2) From 96a23c295c88454770dd5d5a961fec4d1898dcb0 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Wed, 9 Oct 2019 14:53:03 +0530 Subject: [PATCH 12/15] Changes to docs --- docs/src/models/layers.md | 2 +- src/layers/stateless.jl | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index c75c77b7..0007853a 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -66,7 +66,7 @@ LayerNorm GroupNorm ``` -## In-built loss functions: +## Loss functions: ```@docs mse crossentropy diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 8cdac33d..4e142f07 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -51,10 +51,10 @@ function normalise(x::AbstractArray; dims=1) end """ - Kullback Leibler Divergence(KL Divergence) + kldivergence(ŷ, y) KLDivergence is a measure of how much one probability distribution is different from the other. It is always non-negative and zero only when both the distributions are equal everywhere. -https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence +[KL Divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence). """ function kldivergence(ŷ, y) entropy = sum(y .* log.(y)) *1 //size(y,2) @@ -63,14 +63,15 @@ function kldivergence(ŷ, y) end """ - Poisson Loss function + poisson(ŷ, y) Poisson loss function is a measure of how the predicted distribution diverges from the expected distribution. -https://isaacchanghau.github.io/post/loss_functions/ +[Poisson Loss](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson). """ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) """ - Hinge Loss function -Measures the loss given the prediction ŷ and true labels y(containing 1 or -1). This is usually used for measuring whether two inputs are similar or dissimilar + hinge(ŷ, y) +Measures the loss given the prediction ŷ and true labels y(containing 1 or -1). +[Hinge Loss](https://en.wikipedia.org/wiki/Hinge_loss). """ hinge(ŷ, y) = sum(max.(0, 1 .- ŷ .* y)) *1 // size(y,2) From d591b2b59eba2ec360a0836184632d9da8f8dc8f Mon Sep 17 00:00:00 2001 From: thebhatman Date: Wed, 9 Oct 2019 21:36:40 +0530 Subject: [PATCH 13/15] Removed colon and capitalised --- docs/src/models/layers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index 0007853a..227abe31 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -66,7 +66,7 @@ LayerNorm GroupNorm ``` -## Loss functions: +## Loss Functions ```@docs mse crossentropy From 8a93be8c6c9d4686c63284153d9cf8cf07f376a1 Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Mon, 9 Dec 2019 20:39:46 +0530 Subject: [PATCH 14/15] Change loss to cost --- docs/src/models/layers.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index 227abe31..5f2ab3ce 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -66,7 +66,7 @@ LayerNorm GroupNorm ``` -## Loss Functions +## Cost Functions ```@docs mse crossentropy @@ -76,4 +76,4 @@ logitbinarycrossentropy kldivergence poisson hinge -``` \ No newline at end of file +``` From 747e01ea024134b09fdf64fe83c38fb71fe98536 Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Mon, 13 Jan 2020 18:33:30 +0530 Subject: [PATCH 15/15] Test to check for spurious promotions --- test/layers/stateless.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl index 87c495f1..7cb8ed2e 100644 --- a/test/layers/stateless.jl +++ b/test/layers/stateless.jl @@ -75,7 +75,7 @@ const ϵ = 1e-7 for T in (Float32, Float64) y = rand(T, 2) ŷ = rand(T, 2) - for f in (mse, crossentropy, logitcrossentropy) + for f in (mse, crossentropy, logitcrossentropy, Flux.kldivergence, Flux.hinge, Flux.poisson) fwd, back = Flux.pullback(f, ŷ, y) @test fwd isa T @test eltype(back(one(T))[1]) == T