Merge #1166
1166: Fix crossentropy when some probabilities are zero r=dhairyagandhi96 a=cossio Use a function `xlogy(x,y) = x * log(y)` that has the correct limit at `x=0`. Before this PR: ```julia julia> Flux.crossentropy([0.1,0.0,0.9], [0.1,0.0,0.9]) NaN ``` After this PR: ```julia julia> Flux.crossentropy([0.1,0.0,0.9], [0.1,0.0,0.9]) 0.3250829733914482 ``` Co-authored-by: cossio <j.cossio.diaz@gmail.com>
This commit is contained in:
commit
0287abbf66
|
@ -1,6 +1,6 @@
|
|||
name = "Flux"
|
||||
uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c"
|
||||
version = "0.10.4"
|
||||
version = "0.10.5"
|
||||
|
||||
[deps]
|
||||
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
|
||||
|
|
|
@ -54,15 +54,15 @@ function huber_loss(ŷ, y; δ=eltype(ŷ)(1))
|
|||
end
|
||||
|
||||
function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing)
|
||||
return -sum(y .* log.(ŷ)) * 1 // size(y, 2)
|
||||
return -sum(xlogy.(y, ŷ)) * 1 // size(y, 2)
|
||||
end
|
||||
|
||||
function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Number)
|
||||
return -sum(y .* log.(ŷ)) .* weight * 1 // size(y, 2)
|
||||
return -sum(xlogy.(y, ŷ)) .* weight * 1 // size(y, 2)
|
||||
end
|
||||
|
||||
function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::AbstractVector)
|
||||
return -sum(y .* log.(ŷ) .* weight) * 1 // size(y, 2)
|
||||
return -sum(xlogy.(y, ŷ) .* weight) * 1 // size(y, 2)
|
||||
end
|
||||
|
||||
"""
|
||||
|
@ -123,7 +123,7 @@ julia> Flux.binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
|
|||
0.8616703662235441
|
||||
```
|
||||
"""
|
||||
binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
|
||||
binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -xlogy(y, ŷ + ϵ) - xlogy(1 - y, 1 - ŷ + ϵ)
|
||||
|
||||
# Re-definition to fix interaction with CuArrays.
|
||||
CuArrays.@cufunc binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
|
||||
|
@ -195,7 +195,7 @@ It is always non-negative and zero only when both the distributions are equal
|
|||
everywhere.
|
||||
"""
|
||||
function kldivergence(ŷ, y)
|
||||
entropy = sum(y .* log.(y)) * 1 //size(y,2)
|
||||
entropy = sum(xlogx.(y)) * 1 //size(y,2)
|
||||
cross_entropy = crossentropy(ŷ, y)
|
||||
return entropy + cross_entropy
|
||||
end
|
||||
|
@ -208,7 +208,7 @@ distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`.
|
|||
|
||||
[More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
|
||||
"""
|
||||
poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) * 1 // size(y,2)
|
||||
poisson(ŷ, y) = sum(ŷ .- xlogy.(y, ŷ)) * 1 // size(y,2)
|
||||
|
||||
"""
|
||||
hinge(ŷ, y)
|
||||
|
@ -262,3 +262,29 @@ by linearizing all values for each element in the batch.
|
|||
function flatten(x::AbstractArray)
|
||||
return reshape(x, :, size(x)[end])
|
||||
end
|
||||
|
||||
"""
|
||||
xlogx(x)
|
||||
Return `x * log(x)` for `x ≥ 0`, handling `x = 0` by taking the downward limit.
|
||||
"""
|
||||
function xlogx(x)
|
||||
result = x * log(x)
|
||||
ifelse(iszero(x), zero(result), result)
|
||||
end
|
||||
CuArrays.@cufunc function xlogx(x)
|
||||
result = x * log(x)
|
||||
ifelse(iszero(x), zero(result), result)
|
||||
end
|
||||
|
||||
"""
|
||||
xlogy(x, y)
|
||||
Return `x * log(y)` for `y > 0` with correct limit at `x = 0`.
|
||||
"""
|
||||
function xlogy(x, y)
|
||||
result = x * log(y)
|
||||
ifelse(iszero(x), zero(result), result)
|
||||
end
|
||||
CuArrays.@cufunc function xlogy(x, y)
|
||||
result = x * log(y)
|
||||
ifelse(iszero(x), zero(result), result)
|
||||
end
|
||||
|
|
|
@ -1,9 +1,26 @@
|
|||
using Test
|
||||
using Flux: onehotbatch, mse, crossentropy, logitcrossentropy,
|
||||
σ, binarycrossentropy, logitbinarycrossentropy, flatten
|
||||
σ, binarycrossentropy, logitbinarycrossentropy, flatten,
|
||||
xlogx, xlogy
|
||||
|
||||
const ϵ = 1e-7
|
||||
|
||||
@testset "xlogx & xlogy" begin
|
||||
@test iszero(xlogx(0))
|
||||
@test isnan(xlogx(NaN))
|
||||
@test xlogx(2) ≈ 2.0 * log(2.0)
|
||||
@inferred xlogx(2)
|
||||
@inferred xlogx(0)
|
||||
|
||||
@test iszero(xlogy(0, 1))
|
||||
@test isnan(xlogy(NaN, 1))
|
||||
@test isnan(xlogy(1, NaN))
|
||||
@test isnan(xlogy(NaN, NaN))
|
||||
@test xlogy(2, 3) ≈ 2.0 * log(3.0)
|
||||
@inferred xlogy(2, 3)
|
||||
@inferred xlogy(0, 1)
|
||||
end
|
||||
|
||||
@testset "losses" begin
|
||||
# First, regression-style y's
|
||||
y = [1, 1, 0, 0]
|
||||
|
@ -35,6 +52,7 @@ const ϵ = 1e-7
|
|||
lossvalue = 1.203972804325936
|
||||
|
||||
@testset "crossentropy" begin
|
||||
@test crossentropy([0.1,0.0,0.9], [0.1,0.0,0.9]) ≈ crossentropy([0.1,0.9], [0.1,0.9])
|
||||
@test crossentropy(ŷ, y) ≈ lossvalue
|
||||
end
|
||||
|
||||
|
@ -67,6 +85,7 @@ const ϵ = 1e-7
|
|||
y = [1 2 3]
|
||||
ŷ = [4.0 5.0 6.0]
|
||||
@testset "kldivergence" begin
|
||||
@test Flux.kldivergence([0.1,0.0,0.9], [0.1,0.0,0.9]) ≈ Flux.kldivergence([0.1,0.9], [0.1,0.9])
|
||||
@test Flux.kldivergence(ŷ, y) ≈ -1.7661057888493457
|
||||
@test Flux.kldivergence(y, y) ≈ 0
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue