new loss interface

This commit is contained in:
CarloLucibello 2020-04-27 11:44:16 +02:00
parent 9237cdaf5b
commit fd64f4e18e

View File

@ -1,12 +1,14 @@
# Cost functions # Cost functions
""" """
mae(, y) mae(, y; agg=mean)
Return the mean of absolute error; calculated as Return the Mean Absolute Error.
`sum(abs.(ŷ .- y)) / length(y)`.
l = abs.( .- y)
The results
""" """
mae(, y) = sum(abs.( .- y)) * 1 // length(y) mae(, y; agg=mean) = agg(abs.( .- y))
""" """
mse(, y) mse(, y)
@ -20,8 +22,7 @@ julia> Flux.mse([0, 2], [1, 1])
1//1 1//1
``` ```
""" """
mse(, y) = sum(( .- y).^2) * 1 // length(y) mse(, y; agg=mean) = agg(( .- y).^2)
""" """
msle(, y; ϵ=eps(eltype())) msle(, y; ϵ=eps(eltype()))
@ -32,12 +33,12 @@ The `ϵ` term provides numerical stability.
Penalizes an under-predicted estimate greater than an over-predicted estimate. Penalizes an under-predicted estimate greater than an over-predicted estimate.
""" """
msle(, y; ϵ=eps(eltype())) = sum((log.( .+ ϵ) .- log.(y .+ ϵ)).^2) * 1 // length(y) msle(, y; agg=mean, ϵ=eps(eltype())) = agg((log.( .+ ϵ) .- log.(y .+ ϵ)).^2)
""" """
huber_loss(, y; δ=1.0) huber_loss(, y; δ=1)
Return the mean of the [Huber loss](https://en.wikipedia.org/wiki/Huber_loss) Return the mean of the [Huber loss](https://en.wikipedia.org/wiki/Huber_loss)
given the prediction `` and true values `y`. given the prediction `` and true values `y`.
@ -46,24 +47,24 @@ given the prediction `ŷ` and true values `y`.
Huber loss = | Huber loss = |
| δ * (| - y| - 0.5 * δ), otherwise | δ * (| - y| - 0.5 * δ), otherwise
""" """
function huber_loss(, y; δ=eltype()(1)) function huber_loss(, y; agg=mean, δ=one(eltype()))
abs_error = abs.( .- y) abs_error = abs.( .- y)
temp = abs_error .< δ temp = abs_error .< δ
x = eltype()(0.5) x = eltype()(0.5)
hub_loss = sum(((abs_error.^2) .* temp) .* x .+ δ*(abs_error .- x*δ) .* (1 .- temp)) * 1 // length(y) agg(((abs_error.^2) .* temp) .* x .+ δ*(abs_error .- x*δ) .* (1 .- temp))
end end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing) # function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing)
return -sum(y .* log.()) * 1 // size(y, 2) # return -sum(y .* log.(ŷ)) * 1 // size(y, 2)
end # end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::Number) # function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Number)
return -sum(y .* log.()) .* weight * 1 // size(y, 2) # return -sum(y .* log.(ŷ)) .* weight * 1 // size(y, 2)
end # end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::AbstractVector) # function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::AbstractVector)
return -sum(y .* log.() .* weight) * 1 // size(y, 2) # return -sum(y .* log.(ŷ) .* weight) * 1 // size(y, 2)
end # end
""" """
crossentropy(, y; weight = nothing) crossentropy(, y; weight = nothing)
@ -82,7 +83,10 @@ julia> Flux.crossentropy(softmax([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
3.085467254747739 3.085467254747739
``` ```
""" """
crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _crossentropy(, y, weight) # crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _crossentropy(ŷ, y, weight)
function crossentropy(, y; dims=1, agg=mean, ϵ=eps(eltype()))
agg(.-sum(y .* log.( .+ ϵ); dims=dims))
end
""" """
logitcrossentropy(, y; weight = 1) logitcrossentropy(, y; weight = 1)
@ -101,8 +105,11 @@ julia> Flux.logitcrossentropy([-1.1491, 0.8619, 0.3127], [1, 1, 0])
3.085467254747738 3.085467254747738
``` ```
""" """
function logitcrossentropy(::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1) # function logitcrossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)
return -sum(y .* logsoftmax() .* weight) * 1 // size(y, 2) # return -sum(y .* logsoftmax(ŷ) .* weight) * 1 // size(y, 2)
# end
function logitcrossentropy(, y; dims=1, agg=mean)
agg(.-sum(y .* logsoftmax(; dims=dims); dims=dims))
end end
""" """
@ -123,10 +130,12 @@ julia> Flux.binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
0.8616703662235441 0.8616703662235441
``` ```
""" """
binarycrossentropy(, y; ϵ=eps()) = -y*log( + ϵ) - (1 - y)*log(1 - + ϵ) # binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
function binarycrossentropy(, y; agg=mean, ϵ=eps(eltype()))
agg(@.(-y*log(+ϵ) - (1-y)*log(1-+ϵ)))
end
# Re-definition to fix interaction with CuArrays. # Re-definition to fix interaction with CuArrays.
CuArrays.@cufunc binarycrossentropy(, y; ϵ=eps()) = -y*log( + ϵ) - (1 - y)*log(1 - + ϵ) # CuArrays.@cufunc binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ)
""" """
logitbinarycrossentropy(ŷ, y) logitbinarycrossentropy(ŷ, y)
@ -145,10 +154,13 @@ julia> Flux.logitbinarycrossentropy.([-1.1491, 0.8619, 0.3127], [1, 1, 0])
0.8616703662235443 0.8616703662235443
``` ```
""" """
logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ() # logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)
function logitcrossentropy(, y; agg=mean, ϵ=eps(eltype()))
agg(@.((1-y)* - logsigmoid()))
end
# Re-definition to fix interaction with CuArrays. # Re-definition to fix interaction with CuArrays.
CuArrays.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ() # CuArrays.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ)
""" """
normalise(x; dims=1) normalise(x; dims=1)
@ -177,8 +189,8 @@ julia> Flux.normalise(a, dims=2)
``` ```
""" """
function normalise(x::AbstractArray; dims=1) function normalise(x::AbstractArray; dims=1)
μ′ = mean(x, dims = dims) μ′ = mean(x, dims=dims)
σ = std(x, dims = dims, mean = μ′, corrected=false) σ = std(x, dims=dims, mean=μ′, corrected=false)
return (x .- μ′) ./ σ return (x .- μ′) ./ σ
end end
@ -194,21 +206,21 @@ from the other.
It is always non-negative and zero only when both the distributions are equal It is always non-negative and zero only when both the distributions are equal
everywhere. everywhere.
""" """
function kldivergence(, y) function kldivergence(, y; dims=1, agg=mean, ϵ=eps(eltype()))
entropy = sum(y .* log.(y)) * 1 //size(y,2) entropy = agg(sum(y .* log.(y .+ ϵ), dims=dims))
cross_entropy = crossentropy(, y) cross_entropy = crossentropy(, y; dims=dims, agg=agg, ϵ=ϵ)
return entropy + cross_entropy return entropy + cross_entropy
end end
""" """
poisson(, y) poisson(, y)
Return how much the predicted distribution `` diverges from the expected Poisson # Return how much the predicted distribution `ŷ` diverges from the expected Poisson
distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`. # distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`.
REDO
[More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson). [More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
""" """
poisson(, y) = sum( .- y .* log.()) * 1 // size(y,2) poisson(, y; agg=mean) = agg( .- y .* log.())
""" """
hinge(, y) hinge(, y)
@ -219,7 +231,7 @@ prediction `ŷ` and true labels `y` (containing 1 or -1); calculated as
See also: [`squared_hinge`](@ref) See also: [`squared_hinge`](@ref)
""" """
hinge(, y) = sum(max.(0, 1 .- .* y)) * 1 // size(y, 2) hinge(, y; agg=mean) = agg(max.(0, 1 .- .* y))
""" """
squared_hinge(, y) squared_hinge(, y)
@ -229,7 +241,7 @@ Return the squared hinge loss given the prediction `ŷ` and true labels `y`
See also: [`hinge`](@ref) See also: [`hinge`](@ref)
""" """
squared_hinge(, y) = sum((max.(0, 1 .- .* y)).^2) * 1 // size(y, 2) squared_hinge(, y; agg=mean) = agg((max.(0, 1 .- .* y)).^2)
""" """
dice_coeff_loss(, y; smooth=1) dice_coeff_loss(, y; smooth=1)
@ -240,7 +252,7 @@ architecture.
Similar to the F1_score. Calculated as: Similar to the F1_score. Calculated as:
1 - 2*sum(| .* y| + smooth) / (sum(.^2) + sum(y.^2) + smooth)` 1 - 2*sum(| .* y| + smooth) / (sum(.^2) + sum(y.^2) + smooth)`
""" """
dice_coeff_loss(, y; smooth=eltype()(1.0)) = 1 - (2*sum(y .* ) + smooth) / (sum(y.^2) + sum(.^2) + smooth) dice_coeff_loss(, y; smooth=eltype()(1.0)) = 1 - (2*sum(y .* ) + smooth) / (sum(y.^2) + sum(.^2) + smooth) #TODO
""" """
tversky_loss(, y; β=0.7) tversky_loss(, y; β=0.7)
@ -251,13 +263,14 @@ Larger β weigh recall higher than precision (by placing more emphasis on false
Calculated as: Calculated as:
1 - sum(|y .* | + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1) 1 - sum(|y .* | + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1)
""" """
tversky_loss(, y; β=eltype()(0.7)) = 1 - (sum(y .* ) + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1) tversky_loss(, y; β=eltype()(0.7)) = 1 - (sum(y .* ) + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1) #TODO
""" """
flatten(x::AbstractArray) flatten(x::AbstractArray)
Transform (w, h, c, b)-shaped input into (w × h × c, b)-shaped output Reshape arbitrarly-shaped input into a matrix-shaped output
by linearizing all values for each element in the batch. preserving the last dimension size.
Equivalent to `reshape(x, :, size(x)[end])`.
""" """
function flatten(x::AbstractArray) function flatten(x::AbstractArray)
return reshape(x, :, size(x)[end]) return reshape(x, :, size(x)[end])