Compare commits

...

8 Commits

Author SHA1 Message Date
Carlo Lucibello 2290ced09a
Update src/layers/stateless.jl
Co-authored-by: cossio <cossio@users.noreply.github.com>
2020-05-05 16:42:06 +02:00
CarloLucibello 79391beca0 more docs 2020-04-30 12:26:58 +02:00
CarloLucibello b44ba162b1 fix tests 2020-04-30 12:11:15 +02:00
CarloLucibello 654b100ce3 update 2020-04-30 10:39:28 +02:00
CarloLucibello 508b392204 fixes 2020-04-29 12:31:59 +02:00
CarloLucibello 20ed5c5622 more 2020-04-29 11:52:24 +02:00
CarloLucibello 5f1604d25d stuff 2020-04-27 17:17:23 +02:00
CarloLucibello fd64f4e18e new loss interface 2020-04-27 11:44:16 +02:00
12 changed files with 285 additions and 256 deletions

View File

@ -8,9 +8,9 @@ version = "0.5.0"
[[AbstractTrees]]
deps = ["Markdown"]
git-tree-sha1 = "86d092c2599f1f7bb01668bf8eb3412f98d61e47"
git-tree-sha1 = "33e450545eaf7699da1a6e755f9ea65f14077a45"
uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
version = "0.3.2"
version = "0.3.3"
[[Adapt]]
deps = ["LinearAlgebra"]
@ -20,18 +20,18 @@ version = "1.0.1"
[[ArrayLayouts]]
deps = ["FillArrays", "LinearAlgebra"]
git-tree-sha1 = "41956a49a8a4fefa1bf6664bca4a3035aba4c3a0"
git-tree-sha1 = "5a57a6158c1d340635a89d19beb34b0f325a4431"
uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
version = "0.2.3"
version = "0.2.5"
[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[BinaryProvider]]
deps = ["Libdl", "SHA"]
git-tree-sha1 = "5b08ed6036d9d3f0ee6369410b830f8873d4024c"
deps = ["Libdl", "Logging", "SHA"]
git-tree-sha1 = "428e9106b1ff27593cbd979afac9b45b82372b8c"
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
version = "0.5.8"
version = "0.5.9"
[[CEnum]]
git-tree-sha1 = "62847acab40e6855a9b5905ccb99c2b5cf6b3ebb"
@ -46,21 +46,21 @@ version = "4.0.0"
[[CUDAdrv]]
deps = ["CEnum", "CUDAapi", "Printf"]
git-tree-sha1 = "e650cbaee92b60433313157926b1e80d0c3a0e2e"
git-tree-sha1 = "17248da4169c0cdd1699da542f8e110fe4168af6"
uuid = "c5f51814-7f29-56b8-a69c-e4d8f6be1fde"
version = "6.2.2"
version = "6.2.3"
[[CUDAnative]]
deps = ["Adapt", "BinaryProvider", "CEnum", "CUDAapi", "CUDAdrv", "Cthulhu", "DataStructures", "InteractiveUtils", "LLVM", "Libdl", "MacroTools", "Pkg", "Printf", "TimerOutputs"]
git-tree-sha1 = "d1fc99635d0002c8a819b78cb1f441eb44310725"
deps = ["Adapt", "BinaryProvider", "CEnum", "CUDAapi", "CUDAdrv", "Cthulhu", "DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Pkg", "Printf", "TimerOutputs"]
git-tree-sha1 = "0da071ed49a6f5f62d5164de071daa07cedaa1e6"
uuid = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17"
version = "3.0.2"
version = "3.0.4"
[[CodeTracking]]
deps = ["InteractiveUtils", "UUIDs"]
git-tree-sha1 = "0becdab7e6fbbcb7b88d8de5b72e5bb2f28239f3"
git-tree-sha1 = "c8f94de86731698373f3c82a8aa40d8ab765c50c"
uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
version = "0.5.8"
version = "0.5.9"
[[CodecZlib]]
deps = ["TranscodingStreams", "Zlib_jll"]
@ -70,9 +70,9 @@ version = "0.7.0"
[[ColorTypes]]
deps = ["FixedPointNumbers", "Random"]
git-tree-sha1 = "c4c1cca28748906265ed62c788d6fe6f0134d264"
git-tree-sha1 = "f746d4fc892fdf683b5c22064c8e99b2f5b990e7"
uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
version = "0.10.0"
version = "0.10.2"
[[Colors]]
deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"]
@ -94,26 +94,26 @@ version = "0.3.3+0"
[[Cthulhu]]
deps = ["CodeTracking", "InteractiveUtils", "REPL", "Unicode"]
git-tree-sha1 = "484790098c85c26f8e59051f8ff1a0745c034a7d"
git-tree-sha1 = "a4849ec61df9659423cc63b298ed895904ee9743"
uuid = "f68482b8-f384-11e8-15f7-abe071a5a75f"
version = "1.0.1"
version = "1.0.2"
[[CuArrays]]
deps = ["AbstractFFTs", "Adapt", "CEnum", "CUDAapi", "CUDAdrv", "CUDAnative", "DataStructures", "GPUArrays", "Libdl", "LinearAlgebra", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"]
git-tree-sha1 = "e8c55b38dcca955f5aed8ec4479cdc95810db1e1"
git-tree-sha1 = "ad04351946e2ee59a0f1295de28a750dc4917704"
uuid = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
version = "2.0.1"
version = "2.1.0"
[[DataAPI]]
git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252"
git-tree-sha1 = "176e23402d80e7743fc26c19c681bfb11246af32"
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
version = "1.1.0"
version = "1.3.0"
[[DataStructures]]
deps = ["InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "73eb18320fe3ba58790c8b8f6f89420f0a622773"
git-tree-sha1 = "6166ecfaf2b8bbf2b68d791bc1d54501f345d314"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.11"
version = "0.17.15"
[[Dates]]
deps = ["Printf"]
@ -139,6 +139,11 @@ version = "1.0.1"
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[ExprTools]]
git-tree-sha1 = "6f0517056812fd6aa3af23d4b70d5325a2ae4e95"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.1"
[[FillArrays]]
deps = ["LinearAlgebra", "Random", "SparseArrays"]
git-tree-sha1 = "51cc2f9bc4eb9c6c0e81ec2f779d1085583cc956"
@ -158,15 +163,15 @@ version = "0.10.10"
[[GPUArrays]]
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"]
git-tree-sha1 = "d586762b08dcda13228df8967119b9cb6f22ade5"
git-tree-sha1 = "c63cb01e3b6f48ab39f1e35c31ba870650814a18"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "3.1.0"
version = "3.2.0"
[[IRTools]]
deps = ["InteractiveUtils", "MacroTools", "Test"]
git-tree-sha1 = "1a4355e4b5b50be2311ebb644f34f3306dbd0410"
git-tree-sha1 = "8845400bd2d9815d37720251f1b53d27a335e1f4"
uuid = "7869d1d1-7146-5819-86e3-90919afe41df"
version = "0.3.1"
version = "0.3.2"
[[InteractiveUtils]]
deps = ["Markdown"]
@ -305,9 +310,9 @@ version = "0.10.0"
[[StaticArrays]]
deps = ["LinearAlgebra", "Random", "Statistics"]
git-tree-sha1 = "5a3bcb6233adabde68ebc97be66e95dcb787424c"
git-tree-sha1 = "4118cba3529e99af61aea9a83f7bfd3cff5ffb28"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "0.12.1"
version = "0.12.2"
[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
@ -325,9 +330,9 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[TimerOutputs]]
deps = ["Printf"]
git-tree-sha1 = "311765af81bbb48d7bad01fb016d9c328c6ede03"
git-tree-sha1 = "0cc8db57cb537191b02948d4fabdc09eb7f31f98"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.3"
version = "0.5.5"
[[TranscodingStreams]]
deps = ["Random", "Test"]
@ -356,9 +361,9 @@ version = "1.2.11+9"
[[Zygote]]
deps = ["AbstractFFTs", "ArrayLayouts", "DiffRules", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
git-tree-sha1 = "1ccbfbe8930376e31752b812daa2532c723dc332"
git-tree-sha1 = "f7b0f77a86d2434abf693e3c0330e4682deed28d"
uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
version = "0.4.13"
version = "0.4.18"
[[ZygoteRules]]
deps = ["MacroTools"]

View File

@ -8,8 +8,9 @@ makedocs(modules=[Flux, NNlib],
"Building Models" =>
["Basics" => "models/basics.md",
"Recurrence" => "models/recurrence.md",
"Regularisation" => "models/regularisation.md",
"Model Reference" => "models/layers.md",
"Loss Functions" => "models/losses.md",
"Regularisation" => "models/regularisation.md",
"Advanced Model Building" => "models/advanced.md",
"NNlib" => "models/nnlib.md"],
"Handling Data" =>

View File

@ -67,22 +67,4 @@ Many normalisation layers behave differently under training and inference (testi
```@docs
Flux.testmode!
trainmode!
```
## Cost Functions
```@docs
Flux.mae
Flux.mse
Flux.msle
Flux.huber_loss
Flux.crossentropy
Flux.logitcrossentropy
Flux.binarycrossentropy
Flux.logitbinarycrossentropy
Flux.kldivergence
Flux.poisson
Flux.hinge
Flux.squared_hinge
Flux.dice_coeff_loss
Flux.tversky_loss
```
```

36
docs/src/models/losses.md Normal file
View File

@ -0,0 +1,36 @@
## Loss Functions
Flux provides a large number of common loss functions used for training machine learning models.
Loss functions for supervised learning typically expect as inputs a target `y`, and a prediction `ŷ`.
In Flux's convention, the order of the arguments is the following
```julia
loss(ŷ, y)
```
Most loss functions in Flux have an optional argument `agg`, denoting the type of aggregation performed over the
batch:
```julia
loss(ŷ, y) # defaults to `mean`
loss(ŷ, y, agg=sum) # use `sum` for reduction
loss(ŷ, y, agg=x->sum(x, dims=2)) # partial reduction
loss(ŷ, y, agg=x->mean(w .* x)) # weighted mean
loss(ŷ, y, agg=identity) # no aggregation.
```
### Losses Reference
```@docs
Flux.mae
Flux.mse
Flux.msle
Flux.huber_loss
Flux.crossentropy
Flux.logitcrossentropy
Flux.binarycrossentropy
Flux.logitbinarycrossentropy
Flux.kldivergence
Flux.poisson_loss
Flux.hinge
Flux.squared_hinge
Flux.dice_coeff_loss
Flux.tversky_loss
```

View File

@ -7,9 +7,10 @@ add the result to the overall loss.
For example, say we have a simple regression.
```julia
using Flux: crossentropy
using Flux
using Flux: logitcrossentropy
m = Dense(10, 5)
loss(x, y) = crossentropy(softmax(m(x)), y)
loss(x, y) = logitcrossentropy(m(x), y)
```
We can regularise this by taking the (L2) norm of the parameters, `m.W` and `m.b`.
@ -18,19 +19,19 @@ We can regularise this by taking the (L2) norm of the parameters, `m.W` and `m.b
using LinearAlgebra
penalty() = norm(m.W) + norm(m.b)
loss(x, y) = crossentropy(softmax(m(x)), y) + penalty()
loss(x, y) = logitcrossentropy(m(x), y) + penalty()
```
When working with layers, Flux provides the `params` function to grab all
parameters at once. We can easily penalise everything with `sum(norm, params)`.
parameters at once. We can easily penalise everything with `sum`:
```julia
julia> params(m)
julia> Flux.params(m)
2-element Array{Any,1}:
param([0.355408 0.533092; … 0.430459 0.171498])
param([0.0, 0.0, 0.0, 0.0, 0.0])
julia> sum(norm, params(m))
julia> sum(norm, Flux.params(m))
26.01749952921026
```
@ -40,9 +41,9 @@ Here's a larger example with a multi-layer perceptron.
m = Chain(
Dense(28^2, 128, relu),
Dense(128, 32, relu),
Dense(32, 10), softmax)
Dense(32, 10))
loss(x, y) = crossentropy(m(x), y) + sum(norm, params(m))
loss(x, y) = logitcrossentropy(m(x), y) + sum(norm, Flux.params(m))
loss(rand(28^2), rand(10))
```

View File

@ -31,6 +31,7 @@ include("onehot.jl")
include("functor.jl")
include("layers/stateless.jl")
include("layers/losses.jl")
include("layers/basic.jl")
include("layers/conv.jl")
include("layers/recurrent.jl")

View File

@ -1,2 +1,3 @@
@deprecate param(x) x
@deprecate data(x) x
@deprecate poisson poisson_loss

0
src/layers/losses.jl Normal file
View File

View File

@ -1,43 +1,35 @@
# Cost functions
"""
mae(, y)
mae(, y; agg=mean)
Return the mean of absolute error; calculated as
`sum(abs.(ŷ .- y)) / length(y)`.
Return the loss corresponding to mean absolute error:
agg(abs.( .- y))
"""
mae(, y) = sum(abs.( .- y)) * 1 // length(y)
mae(, y; agg=mean) = agg(abs.( .- y))
"""
mse(, y)
mse(, y; agg=mean)
Return the mean squared error between and y; calculated as
`sum((ŷ .- y).^2) / length(y)`.
# Examples
```jldoctest
julia> Flux.mse([0, 2], [1, 1])
1//1
```
Return the loss corresponding to mean square error:
agg(( .- y).^2)
"""
mse(, y) = sum(( .- y).^2) * 1 // length(y)
mse(, y; agg=mean) = agg(( .- y).^2)
"""
msle(, y; ϵ=eps(eltype()))
msle(, y; agg=mean, ϵ=eps(eltype()))
The loss corresponding to mean squared logarithmic errors, calculated as
agg((log.( .+ ϵ) .- log.(y .+ ϵ)).^2)
Return the mean of the squared logarithmic errors; calculated as
`sum((log.(ŷ .+ ϵ) .- log.(y .+ ϵ)).^2) / length(y)`.
The `ϵ` term provides numerical stability.
Penalizes an under-predicted estimate greater than an over-predicted estimate.
Penalizes an under-predicted estimate more than an over-predicted estimate.
"""
msle(, y; ϵ=eps(eltype())) = sum((log.( .+ ϵ) .- log.(y .+ ϵ)).^2) * 1 // length(y)
msle(, y; agg=mean, ϵ=epseltype()) = agg((log.( .+ ϵ) .- log.(y .+ ϵ)).^2)
"""
huber_loss(, y; δ=1.0)
huber_loss(, y; δ=1, agg=mean)
Return the mean of the [Huber loss](https://en.wikipedia.org/wiki/Huber_loss)
given the prediction `` and true values `y`.
@ -46,110 +38,188 @@ given the prediction `ŷ` and true values `y`.
Huber loss = |
| δ * (| - y| - 0.5 * δ), otherwise
"""
function huber_loss(, y; δ=eltype()(1))
function huber_loss(, y; agg=mean, δ=ofeltype(, 1))
abs_error = abs.( .- y)
temp = abs_error .< δ
x = eltype()(0.5)
hub_loss = sum(((abs_error.^2) .* temp) .* x .+ δ*(abs_error .- x*δ) .* (1 .- temp)) * 1 // length(y)
x = ofeltype(, 0.5)
agg(((abs_error.^2) .* temp) .* x .+ δ*(abs_error .- x*δ) .* (1 .- temp))
end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing)
return -sum(y .* log.()) * 1 // size(y, 2)
end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::Number)
return -sum(y .* log.()) .* weight * 1 // size(y, 2)
end
function _crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat, weight::AbstractVector)
return -sum(y .* log.() .* weight) * 1 // size(y, 2)
end
wsum(w::Nothing, x; dims) = sum(x, dims=dims)
wsum(w::Number, x; dims) = w .* sum(x, dims=dims)
wsum(w::AbstractArray, x; dims) = sum( w .* x, dims=dims)
"""
crossentropy(, y; weight = nothing)
crossentropy(, y; weight=nothing, dims=1, ϵ=eps(eltype()),
logits=false, agg=mean)
Return the cross entropy between the given probability distributions;
calculated as `-sum(y .* log.(ŷ) .* weight) / size(y, 2)`.
calculated as
`weight` can be `Nothing`, a `Number` or an `AbstractVector`.
agg(.-sum(weight .* y .* log.( .+ ϵ); dims=dims))agg=mean,
`weight` can be `nothing`, a number or an array.
`weight=nothing` acts like `weight=1` but is faster.
If `logits=true`, the input `̂y` is first fed to a [`softmax`](@ref) layer.
See also: [`Flux.logitcrossentropy`](@ref), [`Flux.binarycrossentropy`](@ref), [`Flux.logitbinarycrossentropy`](@ref)
# Examples
```jldoctest
julia> Flux.crossentropy(softmax([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
3.085467254747739
```
"""
crossentropy(::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _crossentropy(, y, weight)
"""
logitcrossentropy(, y; weight = 1)
Return the crossentropy computed after a [`Flux.logsoftmax`](@ref) operation;
calculated as `-sum(y .* logsoftmax(ŷ) .* weight) / size(y, 2)`.
`logitcrossentropy(ŷ, y)` is mathematically equivalent to
[`Flux.crossentropy(softmax(log(ŷ)), y)`](@ref) but it is more numerically stable.
See also: [`Flux.crossentropy`](@ref), [`Flux.binarycrossentropy`](@ref), [`Flux.logitbinarycrossentropy`](@ref)
# Examples
```jldoctest
julia> Flux.logitcrossentropy([-1.1491, 0.8619, 0.3127], [1, 1, 0])
3.085467254747738
```
"""
function logitcrossentropy(::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1)
return -sum(y .* logsoftmax() .* weight) * 1 // size(y, 2)
function crossentropy(, y; dims=1, agg=mean, ϵ=epseltype(),
weight=nothing, logits=false)
if logits
return logitcrossentropy(, y; dims=dims, agg=agg, weight=weight)
end
agg(.-wsum(weight, y .* log.( .+ ϵ); dims=dims))
end
"""
binarycrossentropy(, y; ϵ=eps())
logitcrossentropy(, y; weight=nothing, agg=mean, dims=1)
Return the crossentropy computed after a [`Flux.logsoftmax`](@ref) operation;
calculated as
agg(.-sum(weight .* y .* logsoftmax(; dims=dims); dims=dims))
`logitcrossentropy(ŷ, y)` is mathematically equivalent to
[`Flux.crossentropy(softmax(log.(ŷ)), y)`](@ref) but it is more numerically stable.
See also: [`Flux.crossentropy`](@ref), [`Flux.binarycrossentropy`](@ref), [`Flux.logitbinarycrossentropy`](@ref)
"""
function logitcrossentropy(, y; dims=1, agg=mean, weight=nothing)
agg(.-wsum(weight, y .* logsoftmax(; dims=dims); dims=dims))
end
"""
binarycrossentropy(, y; agg=mean, ϵ=epseltype(), logits=false)
Return ``-y*\\log( + ϵ) - (1-y)*\\log(1- + ϵ)``. The `ϵ` term provides numerical stability.
Typically, the prediction `` is given by the output of a [`sigmoid`](@ref) activation.
If `logits=true`, the input `̂y` is first fed to a [`sigmoid`](@ref) activation.
See also: [`Flux.crossentropy`](@ref), [`Flux.logitcrossentropy`](@ref), [`Flux.logitbinarycrossentropy`](@ref)
# Examples
```jldoctest
julia> Flux.binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0])
3-element Array{Float64,1}:
1.424397097347566
0.35231664672364077
0.8616703662235441
```
"""
binarycrossentropy(, y; ϵ=eps()) = -y*log( + ϵ) - (1 - y)*log(1 - + ϵ)
# Re-definition to fix interaction with CuArrays.
CuArrays.@cufunc binarycrossentropy(, y; ϵ=eps()) = -y*log( + ϵ) - (1 - y)*log(1 - + ϵ)
function binarycrossentropy(, y; agg=mean, ϵ=epseltype(), logits=false)
if logits
return logitbinarycrossentropy(, y; agg=agg)
end
agg(@.(-y*log(+ϵ) - (1-y)*log(1-+ϵ)))
end
"""
logitbinarycrossentropy(ŷ, y)
logitbinarycrossentropy(ŷ, y; agg=mean)
`logitbinarycrossentropy(ŷ, y)` is mathematically equivalent to
[`Flux.binarycrossentropy(σ(log(ŷ)), y)`](@ref) but it is more numerically stable.
See also: [`Flux.crossentropy`](@ref), [`Flux.logitcrossentropy`](@ref), [`Flux.binarycrossentropy`](@ref)
# Examples
```jldoctest
julia> Flux.logitbinarycrossentropy.([-1.1491, 0.8619, 0.3127], [1, 1, 0])
3-element Array{Float64,1}:
1.4243970973475661
0.35231664672364094
0.8616703662235443
```
"""
logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ()
function logitbinarycrossentropy(, y; agg=mean)
agg(@.((1-y)* - logsigmoid()))
end
# Re-definition to fix interaction with CuArrays.
CuArrays.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ()
"""
kldivergence(, y; dims=1, agg=mean, ϵ=eps(eltype()))
Return the [Kullback-Leibler divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence)
between the given arrays interpreted as probability distributions.
KL divergence is a measure of how much one probability distribution is different
from the other.
It is always non-negative and zero only when both the distributions are equal
everywhere.
"""
function kldivergence(, y; dims=1, agg=mean, ϵ=epseltype())
entropy = agg(sum(y .* log.(y .+ ϵ), dims=dims))
cross_entropy = crossentropy(, y; dims=dims, agg=agg, ϵ=ϵ)
return entropy + cross_entropy
end
"""
poisson_loss(, y; agg=mean, ϵ=eps(eltype())))
Loss function derived from likelihood for a Poisson random variable with mean
`` to take value `y`. It is given by
agg( .- y .* log.( .+ ϵ))
[More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
"""
poisson_loss(, y; agg=mean, ϵ=epseltype()) = agg( .- y .* log.( .+ ϵ))
"""
hinge(, y; agg=mean)
Return the [hinge loss](https://en.wikipedia.org/wiki/Hinge_loss) given the
prediction `` and true labels `y` (containing 1 or -1); calculated as
agg(max.(0, 1 .- .* y))
See also: [`squared_hinge`](@ref)
"""
hinge(, y; agg=mean) = agg(max.(0, 1 .- .* y))
"""
squared_hinge(, y; agg=mean)
Return the squared hinge loss given the prediction `` and true labels `y`
(containing 1 or -1); calculated as
agg(max.(0, 1 .- .* y).^2)
See also: [`hinge`](@ref)
"""
squared_hinge(, y; agg=mean) = agg(max.(0, 1 .- .* y).^2)
"""
dice_coeff_loss(, y; smooth=1, dims=size()[1:end-1], agg=mean)
Return a loss based on the Dice coefficient.
Used in the [V-Net](https://arxiv.org/pdf/1606.04797v1.pdf) architecture
for image segmentation.
Current implementation only works for the binary segmentation case.
The arrays `` and `y` contain the predicted and true probabilities respectively
for the foreground to be present in a certain pixel.
The loss is computed as
1 - (2*sum( .* y; dims) .+ smooth) ./ (sum(.^2 .+ y.^2; dims) .+ smooth)
and then aggregated with `agg` over the batch.
"""
function dice_coeff_loss(, y; smooth=ofeltype(, 1),
dims=size()[1:end-1],
agg=mean)
f = x -> sum(x, dims=dims)
agg(1 .- (2 .* f(y .* ) .+ smooth) ./ (f(y.^2 + .^2) .+ smooth))
end
"""
tversky_loss(, y; β=0.7, α=1-β, dims=size()[1:end-1] agg=mean)
Return the [Tversky loss](https://arxiv.org/pdf/1706.05721.pdf)
for binary classification.
The arrays `` and `y` contain the predicted and true probabilities respectively.
Used with imbalanced data to give more weight to false negatives.
Larger `β` weigh recall higher than precision (by placing more emphasis on false negatives)
Calculated as:
num = sum(y .* , dims=dims)
den = sum(@.(*y + α**(1-y) + β*(1-)*y)), dims=dims)
tversky_loss = 1 - num/den
and then aggregated with `agg` over the batch.
When `α+β=1`, it is equal to `1-F_β`, where `F_β` is an F-score.
"""
function tversky_loss(, y; β=ofeltype(, 0.7), α=1-β, dims=size()[1:end-1], agg=mean)
f = x -> sum(x, dims=dims)
agg(1 .- f( .* y) ./ f(@.(*y + α**(1-y) + β*(1-)*y)))
end
# TODO normalise over last dimension is typically what you want to do.
# Possible deprecation path: `normalise(x; dims=1)` -> `normalise(x; dims)` -> `normalise(x; dims=size(x)[end])`
"""
normalise(x; dims=1)
@ -176,89 +246,18 @@ julia> Flux.normalise(a, dims=2)
-1.22474 0.0 1.22474
```
"""
function normalise(x::AbstractArray; dims=1)
μ′ = mean(x, dims = dims)
σ = std(x, dims = dims, mean = μ′, corrected=false)
return (x .- μ′) ./ σ
function normalise(x::AbstractArray; dims=1, ϵ=ofeltype(x, 1e-6))
μ′ = mean(x, dims=dims)
# σ = std(x, dims=dims, mean=μ′, corrected=false) # use this when #478 gets merged
σ = std(x, dims=dims, corrected=false)
return (x .- μ′) ./ (σ.+ ϵ)
end
"""
kldivergence(, y)
Return the
[Kullback-Leibler divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence)
between the given probability distributions.
KL divergence is a measure of how much one probability distribution is different
from the other.
It is always non-negative and zero only when both the distributions are equal
everywhere.
"""
function kldivergence(, y)
entropy = sum(y .* log.(y)) * 1 //size(y,2)
cross_entropy = crossentropy(, y)
return entropy + cross_entropy
end
"""
poisson(, y)
Return how much the predicted distribution `` diverges from the expected Poisson
distribution `y`; calculated as `sum(ŷ .- y .* log.(ŷ)) / size(y, 2)`.
[More information.](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
"""
poisson(, y) = sum( .- y .* log.()) * 1 // size(y,2)
"""
hinge(, y)
Return the [hinge loss](https://en.wikipedia.org/wiki/Hinge_loss) given the
prediction `` and true labels `y` (containing 1 or -1); calculated as
`sum(max.(0, 1 .- ŷ .* y)) / size(y, 2)`.
See also: [`squared_hinge`](@ref)
"""
hinge(, y) = sum(max.(0, 1 .- .* y)) * 1 // size(y, 2)
"""
squared_hinge(, y)
Return the squared hinge loss given the prediction `` and true labels `y`
(containing 1 or -1); calculated as `sum((max.(0, 1 .- ŷ .* y)).^2) / size(y, 2)`.
See also: [`hinge`](@ref)
"""
squared_hinge(, y) = sum((max.(0, 1 .- .* y)).^2) * 1 // size(y, 2)
"""
dice_coeff_loss(, y; smooth=1)
Return a loss based on the dice coefficient.
Used in the [V-Net](https://arxiv.org/pdf/1606.04797v1.pdf) image segmentation
architecture.
Similar to the F1_score. Calculated as:
1 - 2*sum(| .* y| + smooth) / (sum(.^2) + sum(y.^2) + smooth)`
"""
dice_coeff_loss(, y; smooth=eltype()(1.0)) = 1 - (2*sum(y .* ) + smooth) / (sum(y.^2) + sum(.^2) + smooth)
"""
tversky_loss(, y; β=0.7)
Return the [Tversky loss](https://arxiv.org/pdf/1706.05721.pdf).
Used with imbalanced data to give more weight to false negatives.
Larger β weigh recall higher than precision (by placing more emphasis on false negatives)
Calculated as:
1 - sum(|y .* | + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1)
"""
tversky_loss(, y; β=eltype()(0.7)) = 1 - (sum(y .* ) + 1) / (sum(y .* + β*(1 .- y) .* + (1 - β)*y .* (1 .- )) + 1)
"""
flatten(x::AbstractArray)
Transform (w, h, c, b)-shaped input into (w × h × c, b)-shaped output
by linearizing all values for each element in the batch.
Reshape arbitrarly-shaped input into a matrix-shaped output
preserving the last dimension size.
Equivalent to `reshape(x, :, size(x)[end])`.
"""
function flatten(x::AbstractArray)
return reshape(x, :, size(x)[end])
end
flatten(x::AbstractArray) = reshape(x, :, size(x)[end])

View File

@ -4,6 +4,9 @@ nfan(n) = 1, n # A vector is treated as a n×1 matrix
nfan(n_out, n_in) = n_in, n_out # In case of Dense kernels: arranged as matrices
nfan(dims...) = prod(dims[1:end-2]) .* (dims[end-1], dims[end]) # In case of convolution kernels
ofeltype(x, y) = convert(float(eltype(x)), y)
epseltype(x) = eps(float(eltype(x)))
"""
glorot_uniform(dims...)

View File

@ -33,8 +33,8 @@ cx = gpu(x)
x = [-1.1491, 0.8619, 0.3127]
y = [1, 1, 0.]
@test Flux.binarycrossentropy.(σ.(x),y) Array(Flux.binarycrossentropy.(cu(σ.(x)),cu(y)))
@test Flux.logitbinarycrossentropy.(x,y) Array(Flux.logitbinarycrossentropy.(cu(x),cu(y)))
@test Flux.binarycrossentropy(σ.(x), y) Flux.binarycrossentropy(cu(σ.(x)), cu(y))
@test Flux.logitbinarycrossentropy(x, y) Flux.logitbinarycrossentropy(cu(x), cu(y))
xs = rand(5, 5)
ys = Flux.onehotbatch(1:5,1:5)

View File

@ -56,12 +56,12 @@ const ϵ = 1e-7
logŷ, y = randn(3), rand(3)
@testset "binarycrossentropy" begin
@test binarycrossentropy.(σ.(logŷ), y; ϵ=0) -y.*log.(σ.(logŷ)) - (1 .- y).*log.(1 .- σ.(logŷ))
@test binarycrossentropy.(σ.(logŷ), y) -y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 .- y).*log.(1 .- σ.(logŷ) .+ eps.(σ.(logŷ)))
@test binarycrossentropy(σ.(logŷ), y; ϵ=0) mean(-y.*log.(σ.(logŷ)) - (1 .- y).*log.(1 .- σ.(logŷ)))
@test binarycrossentropy(σ.(logŷ), y) mean(-y.*log.(σ.(logŷ) .+ eps.(σ.(logŷ))) - (1 .- y).*log.(1 .- σ.(logŷ) .+ eps.(σ.(logŷ))))
end
@testset "logitbinarycrossentropy" begin
@test logitbinarycrossentropy.(logŷ, y) binarycrossentropy.(σ.(logŷ), y; ϵ=0)
@test logitbinarycrossentropy(logŷ, y) binarycrossentropy(σ.(logŷ), y; ϵ=0)
end
y = [1 2 3]
@ -86,28 +86,28 @@ const ϵ = 1e-7
y = [0.1 0.2 0.3]
ŷ = [0.4 0.5 0.6]
@testset "poisson" begin
@test Flux.poisson(ŷ, y) 0.6278353988097339
@test Flux.poisson(y, y) 0.5044459776946685
@test Flux.poisson_loss(ŷ, y) 0.6278353988097339
@test Flux.poisson_loss(y, y) 0.5044459776946685
end
y = [1.0 0.5 0.3 2.4]
ŷ = [0 1.4 0.5 1.2]
@testset "dice_coeff_loss" begin
@test Flux.dice_coeff_loss(ŷ, y) 0.2799999999999999
@test Flux.dice_coeff_loss(y, y) 0.0
@test Flux.dice_coeff_loss(ŷ, y, dims=(1,2)) 0.2799999999999999
@test Flux.dice_coeff_loss(y, y, dims=(1,2)) 0.0
end
@testset "tversky_loss" begin
@test Flux.tversky_loss(ŷ, y) -0.06772009029345383
@test Flux.tversky_loss(ŷ, y, β = 0.8) -0.09490740740740744
@test Flux.tversky_loss(y, y) -0.5576923076923075
@test Flux.tversky_loss(ŷ, y, dims=(1,2)) 0.036175710594315236
@test Flux.tversky_loss(ŷ, y, dims=(1,2), β = 0.8) 0.06281407035175879
@test Flux.tversky_loss(y, y, dims=(1,2)) -0.6904761904761902
end
@testset "no spurious promotions" begin
for T in (Float32, Float64)
y = rand(T, 2)
ŷ = rand(T, 2)
for f in (mse, crossentropy, logitcrossentropy, Flux.kldivergence, Flux.hinge, Flux.poisson,
for f in (mse, crossentropy, logitcrossentropy, Flux.kldivergence, Flux.hinge, Flux.poisson_loss,
Flux.mae, Flux.huber_loss, Flux.msle, Flux.squared_hinge, Flux.dice_coeff_loss, Flux.tversky_loss)
fwd, back = Flux.pullback(f, , y)
@test fwd isa T