diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md
index 8b725bfb..5f2ab3ce 100644
--- a/docs/src/models/layers.md
+++ b/docs/src/models/layers.md
@@ -65,3 +65,15 @@ AlphaDropout
 LayerNorm
 GroupNorm
 ```
+
+## Cost Functions
+```@docs
+mse
+crossentropy
+logitcrossentropy
+binarycrossentropy
+logitbinarycrossentropy
+kldivergence
+poisson
+hinge
+```
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
index 870a6cdf..159a8385 100644
--- a/src/layers/stateless.jl
+++ b/src/layers/stateless.jl
@@ -84,3 +84,29 @@ function normalise(x::AbstractArray; dims=1)
   σ′ = std(x, dims = dims, mean = μ′, corrected=false)
   return (x .- μ′) ./ σ′
 end
+
+"""
+    kldivergence(ŷ, y)
+KLDivergence is a measure of how much one probability distribution is different from the other.
+It is always non-negative and zero only when both the distributions are equal everywhere.
+[KL Divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence).
+"""
+function kldivergence(ŷ, y)
+  entropy = sum(y .* log.(y)) *1 //size(y,2)
+  cross_entropy = crossentropy(ŷ, y)
+  return entropy + cross_entropy
+end
+
+"""
+    poisson(ŷ, y)
+Poisson loss function is a measure of how the predicted distribution diverges from the expected distribution.
+[Poisson Loss](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson).
+"""
+poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2)
+
+"""
+    hinge(ŷ, y)
+Measures the loss given the prediction ŷ and true labels y(containing 1 or -1). 
+[Hinge Loss](https://en.wikipedia.org/wiki/Hinge_loss).
+"""
+hinge(ŷ, y) = sum(max.(0, 1 .-  ŷ .* y)) *1 // size(y,2)
diff --git a/test/layers/stateless.jl b/test/layers/stateless.jl
index 9e01af07..7cb8ed2e 100644
--- a/test/layers/stateless.jl
+++ b/test/layers/stateless.jl
@@ -49,12 +49,33 @@ const ϵ = 1e-7
   @testset "logitbinarycrossentropy" begin
     @test logitbinarycrossentropy.(logŷ, y) ≈ binarycrossentropy.(σ.(logŷ), y; ϵ=0)
   end
-
+  
+  y = [1 2 3]
+  y1 = [4.0 5.0 6.0]
+  @testset "kldivergence" begin
+    @test Flux.kldivergence(y, y1) ≈ 4.761838062403337
+    @test Flux.kldivergence(y, y) ≈ 0 
+  end
+  
+  y = [1 2 3 4]
+  y1 = [5.0 6.0 7.0 8.0]
+  @testset "hinge" begin
+    @test Flux.hinge(y, y1) ≈ 0
+    @test Flux.hinge(y, 0.5 .* y) ≈ 0.125
+  end
+  
+  y = [0.1 0.2 0.3]
+  y1 = [0.4 0.5 0.6]
+  @testset "poisson" begin
+    @test Flux.poisson(y, y1) ≈ 1.0160455586700767
+    @test Flux.poisson(y, y) ≈ 0.5044459776946685
+  end
+  
   @testset "no spurious promotions" begin
     for T in (Float32, Float64)
       y = rand(T, 2)
       ŷ = rand(T, 2)
-      for f in (mse, crossentropy, logitcrossentropy)
+      for f in (mse, crossentropy, logitcrossentropy, Flux.kldivergence, Flux.hinge, Flux.poisson)
         fwd, back = Flux.pullback(f, ŷ, y)
         @test fwd isa T
         @test eltype(back(one(T))[1]) == T