From 2cb94981a0176f070eb2dec31c00ef125613ce3f Mon Sep 17 00:00:00 2001
From: Mike J Innes <mike.j.innes@gmail.com>
Date: Fri, 27 Oct 2017 12:05:37 +0100
Subject: [PATCH 1/2] gpu-ready log

---
 src/Flux.jl             |  1 +
 src/layers/stateless.jl |  4 +--
 src/numeric.jl          | 80 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 src/numeric.jl

diff --git a/src/Flux.jl b/src/Flux.jl
index acefff19..ce3861e5 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -21,6 +21,7 @@ include("optimise/Optimise.jl")
 using .Optimise
 
 include("utils.jl")
+include("numeric.jl")
 include("onehot.jl")
 include("treelike.jl")
 
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
index 3931c216..56d18349 100644
--- a/src/layers/stateless.jl
+++ b/src/layers/stateless.jl
@@ -3,12 +3,12 @@
 mse(ŷ, y) = sum((ŷ .- y).^2)/length(y)
 
 crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat) =
-  -sum(y .* log.(ŷ)) / size(y, 2)
+  -sum(y .* log_fast.(ŷ)) / size(y, 2)
 
 @deprecate logloss(x, y) crossentropy(x, y)
 
 function logitcrossentropy(logŷ::AbstractVecOrMat, y::AbstractVecOrMat)
   logŷ = logŷ .- maximum(logŷ, 1)
-  ypred = logŷ .- log.(sum(exp.(logŷ), 1))
+  ypred = logŷ .- log_fast.(sum(exp.(logŷ), 1))
   -sum(y .* ypred) / size(y, 2)
 end
diff --git a/src/numeric.jl b/src/numeric.jl
new file mode 100644
index 00000000..9c444043
--- /dev/null
+++ b/src/numeric.jl
@@ -0,0 +1,80 @@
+using Base.Math: @horner, significand_bits, exponent_raw_max, exponent_bias
+
+if VERSION < v"0.7.0-DEV.1430"
+    using Base.Math.fpinttype
+else
+    using Base.uinttype
+end
+
+# log_fast from
+# https://github.com/musm/SLEEF.jl/blob/c9dcd2eb090d69ec40790f19798c5fef2aba2616/src/log.jl
+
+const MLN2  = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01 # log(2)
+
+@inline float2integer(d::Float64) = (reinterpret(Int64, d) >> significand_bits(Float64)) % Int
+@inline float2integer(d::Float32) = (reinterpret(Int32, d) >> significand_bits(Float32)) % Int
+
+@inline function ilogb2k(d::T) where {T<:Union{Float32,Float64}}
+    (float2integer(d) & exponent_raw_max(T)) - exponent_bias(T)
+end
+
+@inline function ldexp3k(x::T, e::Int) where {T<:Union{Float32,Float64}}
+    if VERSION < v"0.7.0-DEV.1430"
+        reinterpret(T, reinterpret(Unsigned, x) + (Int64(e) << significand_bits(T)) % fpinttype(T))
+    else
+        reinterpret(T, reinterpret(Unsigned, x) + (Int64(e) << significand_bits(T)) % uinttype(T))
+    end
+end
+
+"""
+    log_fast(x)
+Compute the natural logarithm of `x`. The inverse of the natural logarithm is
+the natural expoenential function `exp(x)`
+"""
+function log_fast end
+
+let
+global log_fast
+
+c8d = 0.153487338491425068243146
+c7d = 0.152519917006351951593857
+c6d = 0.181863266251982985677316
+c5d = 0.222221366518767365905163
+c4d = 0.285714294746548025383248
+c3d = 0.399999999950799600689777
+c2d = 0.6666666666667778740063
+c1d = 2.0
+
+c5f = 0.2392828464508056640625f0
+c4f = 0.28518211841583251953125f0
+c3f = 0.400005877017974853515625f0
+c2f = 0.666666686534881591796875f0
+c1f = 2f0
+
+global @inline log_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
+global @inline log_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f
+
+function log_fast(d::T) where {T<:Union{Float32,Float64}}
+    o = d < realmin(T)
+    o && (d *= T(Int64(1) << 32) * T(Int64(1) << 32))
+
+    e = ilogb2k(d * T(1.0/0.75))
+    m = ldexp3k(d, -e)
+    o && (e -= 64)
+
+    x  = (m - 1) / (m + 1)
+    x2 = x * x
+
+    t = log_fast_kernel(x2)
+
+    x = x * t + T(MLN2) * e
+
+    isinf(d) && (x = T(Inf))
+    (d < 0 || isnan(d)) && (x = T(NaN))
+    d == 0 && (x = -T(Inf))
+
+    return x
+end
+end
+
+log_fast(x::Union{Int32,Int64}) = log_fast(float(x))

From e0657d93ecccf1b1ac924a42909a0c79b9433df4 Mon Sep 17 00:00:00 2001
From: Mike J Innes <mike.j.innes@gmail.com>
Date: Thu, 9 Nov 2017 15:03:57 +0000
Subject: [PATCH 2/2] mv numeric.jl to nnlib

---
 src/Flux.jl             |  1 -
 src/layers/stateless.jl |  2 ++
 src/numeric.jl          | 80 -----------------------------------------
 3 files changed, 2 insertions(+), 81 deletions(-)
 delete mode 100644 src/numeric.jl

diff --git a/src/Flux.jl b/src/Flux.jl
index ce3861e5..acefff19 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -21,7 +21,6 @@ include("optimise/Optimise.jl")
 using .Optimise
 
 include("utils.jl")
-include("numeric.jl")
 include("onehot.jl")
 include("treelike.jl")
 
diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl
index 56d18349..834068aa 100644
--- a/src/layers/stateless.jl
+++ b/src/layers/stateless.jl
@@ -1,3 +1,5 @@
+using NNlib: log_fast
+
 # Cost functions
 
 mse(ŷ, y) = sum((ŷ .- y).^2)/length(y)
diff --git a/src/numeric.jl b/src/numeric.jl
deleted file mode 100644
index 9c444043..00000000
--- a/src/numeric.jl
+++ /dev/null
@@ -1,80 +0,0 @@
-using Base.Math: @horner, significand_bits, exponent_raw_max, exponent_bias
-
-if VERSION < v"0.7.0-DEV.1430"
-    using Base.Math.fpinttype
-else
-    using Base.uinttype
-end
-
-# log_fast from
-# https://github.com/musm/SLEEF.jl/blob/c9dcd2eb090d69ec40790f19798c5fef2aba2616/src/log.jl
-
-const MLN2  = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01 # log(2)
-
-@inline float2integer(d::Float64) = (reinterpret(Int64, d) >> significand_bits(Float64)) % Int
-@inline float2integer(d::Float32) = (reinterpret(Int32, d) >> significand_bits(Float32)) % Int
-
-@inline function ilogb2k(d::T) where {T<:Union{Float32,Float64}}
-    (float2integer(d) & exponent_raw_max(T)) - exponent_bias(T)
-end
-
-@inline function ldexp3k(x::T, e::Int) where {T<:Union{Float32,Float64}}
-    if VERSION < v"0.7.0-DEV.1430"
-        reinterpret(T, reinterpret(Unsigned, x) + (Int64(e) << significand_bits(T)) % fpinttype(T))
-    else
-        reinterpret(T, reinterpret(Unsigned, x) + (Int64(e) << significand_bits(T)) % uinttype(T))
-    end
-end
-
-"""
-    log_fast(x)
-Compute the natural logarithm of `x`. The inverse of the natural logarithm is
-the natural expoenential function `exp(x)`
-"""
-function log_fast end
-
-let
-global log_fast
-
-c8d = 0.153487338491425068243146
-c7d = 0.152519917006351951593857
-c6d = 0.181863266251982985677316
-c5d = 0.222221366518767365905163
-c4d = 0.285714294746548025383248
-c3d = 0.399999999950799600689777
-c2d = 0.6666666666667778740063
-c1d = 2.0
-
-c5f = 0.2392828464508056640625f0
-c4f = 0.28518211841583251953125f0
-c3f = 0.400005877017974853515625f0
-c2f = 0.666666686534881591796875f0
-c1f = 2f0
-
-global @inline log_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
-global @inline log_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f
-
-function log_fast(d::T) where {T<:Union{Float32,Float64}}
-    o = d < realmin(T)
-    o && (d *= T(Int64(1) << 32) * T(Int64(1) << 32))
-
-    e = ilogb2k(d * T(1.0/0.75))
-    m = ldexp3k(d, -e)
-    o && (e -= 64)
-
-    x  = (m - 1) / (m + 1)
-    x2 = x * x
-
-    t = log_fast_kernel(x2)
-
-    x = x * t + T(MLN2) * e
-
-    isinf(d) && (x = T(Inf))
-    (d < 0 || isnan(d)) && (x = T(NaN))
-    d == 0 && (x = -T(Inf))
-
-    return x
-end
-end
-
-log_fast(x::Union{Int32,Int64}) = log_fast(float(x))