new loss interface

This commit is contained in:
CarloLucibello 2020-04-27 11:44:16 +02:00
parent 9237cdaf5b
commit fd64f4e18e

View File

@ -1,12 +1,14 @@
# Cost functions
"""
mae(, y)
mae(, y; agg=mean)
Return the mean of absolute error; calculated as
`sum(abs.(ŷ .- y)) / length(y)`.
Return the Mean Absolute Error.
l = abs.( .- y)
The results
"""
mae(, y) = sum(abs.( .- y)) * 1 // length(y)
mae(, y; agg=mean) = agg(abs.( .- y))
"""
mse(, y)
@ -20,8 +22,7 @@ julia> Flux.mse([0, 2], [1, 1])
1//1
```
"""
mse(, y) = sum(( .- y).^2) * 1 // length(y)
mse(, y; agg=mean) = agg(( .- y).^2)
"""
msle(, y; ϵ=eps(eltype()))
@ -32,12 +33,12 @@ The `ϵ` term provides numerical stability.
Penalizes an under-predicted estimate greater than an over-predicted estimate.
"""
msle(, y; ϵ=eps(eltype())) = sum((log.( .+ ϵ) .- log.(y .+ ϵ)).^2) * 1 // length(y)
msle(, y; agg=mean, ϵ=eps(eltype())) = agg((log.( .+ ϵ) .- log.(y .+ ϵ)).^2)
"""
huber_loss(, y; δ=1.0)
huber_loss(, y; δ=1)
Return the mean of the [Huber loss](https://en.wikipedia.org/wiki/Huber_loss)
given the prediction `` and true values `y`.
@ -46,24 +47,24 @@ given the prediction `ŷ` and true values `y`.
Huber loss = |
| δ * (| - y| - 0.5 * δ), otherwise
"""
function huber_loss(, y; δ=eltype()(1))
function huber_loss(, y; agg=mean, δ=one(eltype()))
abs_error = abs.( .- y)
temp = abs_error .< δ
x = eltype()(0.5)
hub_loss = sum(((abs_error.^2) .* temp) .* x .+ δ*(abs_error .- x*δ) .* (1 .- temp)) * 1 // length(y)
agg(((abs_error.^2) .* temp) .* x .+ δ*(abs_error .- x*δ) .* (1 .- temp))
end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing)
return -sum(y .* log.()) * 1 // size(y, 2)
end
# function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing)
# return -sum(y .* log.(ŷ)) * 1 // size(y, 2)
# end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::Number)
return -sum(y .* log.()) .* weight * 1 // size(y, 2)
end
# function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Number)
# return -sum(y .* log.(ŷ)) .* weight * 1 // size(y, 2)
# end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::AbstractVector)
return -sum(y .* log.() .* weight) * 1 // size(y, 2)
end
# function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::AbstractVector)
# return -sum(y .* log.(ŷ) .* weight) * 1 // size(y, 2)
# end
"""
crossentropy(, y; weight = nothing)
@ -82,7 +83,10 @@ julia> Flux.crossentropy(softmax([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
3.085467254747739
```
"""
crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _crossentropy(, y, weight)
# crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _crossentropy(ŷ, y, weight)
function crossentropy(, y; dims=1, agg=mean, ϵ=eps(eltype()))
agg(.-sum(y .* log.( .+ ϵ); dims=dims))
end
"""
logitcrossentropy(, y; weight = 1)
@ -101,8 +105,11 @@ julia> Flux.logitcrossentropy([-1.1491, 0.8619, 0.3127], [1, 1, 0])
3.085467254747738
```
"""
function logitcrossentropy(::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)
return -sum(y .* logsoftmax() .* weight) * 1 // size(y, 2)
# function logitcrossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)
# return -sum(y .* logsoftmax(ŷ) .* weight) * 1 // size(y, 2)
# end
function logitcrossentropy(, y; dims=1, agg=mean)
agg(.-sum(y .* logsoftmax(; dims=dims); dims=dims))
end
"""
@ -123,10 +130,12 @@ julia> Flux.binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
0.8616703662235441
```
"""
binarycrossentropy(, y; ϵ=eps()) = -y*log( + ϵ) - (1 - y)*log(1 - + ϵ)
# binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
function binarycrossentropy(, y; agg=mean, ϵ=eps(eltype()))
agg(@.(-y*log(+ϵ) - (1-y)*log(1-+ϵ)))
end
# Re-definition to fix interaction with CuArrays.
CuArrays.@cufunc binarycrossentropy(, y; ϵ=eps()) = -y*log( + ϵ) - (1 - y)*log(1 - + ϵ)
# CuArrays.@cufunc binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
"""
logitbinarycrossentropy(ŷ, y)
@ -145,10 +154,13 @@ julia> Flux.logitbinarycrossentropy.([-1.1491, 0.8619, 0.3127], [1, 1, 0])
0.8616703662235443
```
"""
logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ()
# logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)
function logitcrossentropy(, y; agg=mean, ϵ=eps(eltype()))
agg(@.((1-y)* - logsigmoid()))
end
# Re-definition to fix interaction with CuArrays.
CuArrays.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ()
# CuArrays.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)
"""
normalise(x; dims=1)
@ -177,8 +189,8 @@ julia> Flux.normalise(a, dims=2)
```
"""
function normalise(x::AbstractArray; dims=1)
μ′ = mean(x, dims = dims)
σ = std(x, dims = dims, mean = μ′, corrected=false)
μ′ = mean(x, dims=dims)
σ = std(x, dims=dims, mean=μ′, corrected=false)
return (x .- μ′) ./ σ
end
@ -194,21 +206,21 @@ from the other.
It is always non-negative and zero only when both the distributions are equal
everywhere.
"""
function kldivergence(, y)
entropy = sum(y .* log.(y)) * 1 //size(y,2)
cross_entropy = crossentropy(, y)
function kldivergence(, y; dims=1, agg=mean, ϵ=eps(eltype()))
entropy = agg(sum(y .* log.(y .+ ϵ), dims=dims))
cross_entropy = crossentropy(, y; dims=dims, agg=agg, ϵ=ϵ)
return entropy + cross_entropy
end
"""
poisson(, y)
Return how much the predicted distribution `` diverges from the expected Poisson
distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`.
# Return how much the predicted distribution `ŷ` diverges from the expected Poisson
# distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`.
REDO
[More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
"""
poisson(, y) = sum( .- y .* log.()) * 1 // size(y,2)
poisson(, y; agg=mean) = agg( .- y .* log.())
"""
hinge(, y)
@ -219,7 +231,7 @@ prediction `ŷ` and true labels `y` (containing 1 or -1); calculated as
See also: [`squared_hinge`](@ref)
"""
hinge(, y) = sum(max.(0, 1 .- .* y)) * 1 // size(y, 2)
hinge(, y; agg=mean) = agg(max.(0, 1 .- .* y))
"""
squared_hinge(, y)
@ -229,7 +241,7 @@ Return the squared hinge loss given the prediction `ŷ` and true labels `y`
See also: [`hinge`](@ref)
"""
squared_hinge(, y) = sum((max.(0, 1 .- .* y)).^2) * 1 // size(y, 2)
squared_hinge(, y; agg=mean) = agg((max.(0, 1 .- .* y)).^2)
"""
dice_coeff_loss(, y; smooth=1)
@ -240,7 +252,7 @@ architecture.
Similar to the F1_score. Calculated as:
1 - 2*sum(| .* y| + smooth) / (sum(.^2) + sum(y.^2) + smooth)`
"""
dice_coeff_loss(, y; smooth=eltype()(1.0)) = 1 - (2*sum(y .* ) + smooth) / (sum(y.^2) + sum(.^2) + smooth)
dice_coeff_loss(, y; smooth=eltype()(1.0)) = 1 - (2*sum(y .* ) + smooth) / (sum(y.^2) + sum(.^2) + smooth) #TODO
"""
tversky_loss(, y; β=0.7)
@ -251,13 +263,14 @@ Larger β weigh recall higher than precision (by placing more emphasis on false
Calculated as:
1 - sum(|y .* | + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1)
"""
tversky_loss(, y; β=eltype()(0.7)) = 1 - (sum(y .* ) + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1)
tversky_loss(, y; β=eltype()(0.7)) = 1 - (sum(y .* ) + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1) #TODO
"""
flatten(x::AbstractArray)
Transform (w, h, c, b)-shaped input into (w × h × c, b)-shaped output
by linearizing all values for each element in the batch.
Reshape arbitrarly-shaped input into a matrix-shaped output
preserving the last dimension size.
Equivalent to `reshape(x, :, size(x)[end])`.
"""
function flatten(x::AbstractArray)
return reshape(x, :, size(x)[end])