Flux.jl/src/utils.jl

# Arrays
glorot_uniform(dims...) = (rand(Float32, dims...) .- 0.5f0) .* sqrt(24.0f0/sum(dims))
glorot_normal(dims...) = randn(Float32, dims...) .* sqrt(2.0f0/sum(dims))

ones(T::Type, dims...) = Base.ones(T, dims...)
zeros(T::Type, dims...) = Base.zeros(T, dims...)

ones(dims...) = Base.ones(Float32, dims...)
zeros(dims...) = Base.zeros(Float32, dims...)

unsqueeze(xs, dim) = reshape(xs, (size(xs)[1:dim-1]..., 1, size(xs)[dim:end]...))

stack(xs, dim) = cat(unsqueeze.(xs, dim)..., dims=dim)
unstack(xs, dim) = [copy(selectdim(xs, dim, i)) for i in 1:size(xs, dim)]

"""
    chunk(xs, n)

Split `xs` into `n` parts.

```julia
julia> chunk(1:10, 3)
3-element Array{Array{Int64,1},1}:
 [1, 2, 3, 4]
 [5, 6, 7, 8]
 [9, 10]
```
"""
chunk(xs, n) = collect(Iterators.partition(xs, ceil(Int, length(xs)/n)))

batchindex(xs, i) = (reverse(Base.tail(reverse(axes(xs))))..., i)

"""
    frequencies(xs)

Count the number of times that each element of `xs` appears.

```julia
julia> frequencies(['a','b','b'])
Dict{Char,Int64} with 2 entries:
  'b' => 2
  'a' => 1
```
"""
function frequencies(xs)
  fs = Dict{eltype(xs),Int}()
  for x in xs
    fs[x] = get(fs, x, 0) + 1
  end
  return fs
end

head(x::Tuple) = reverse(Base.tail(reverse(x)))

squeezebatch(x) = reshape(x, head(size(x)))

"""
  batch(xs)

Batch the arrays in `xs` into a single array.

```julia
julia> batch([[1,2,3],[4,5,6]])
3×2 Array{Int64,2}:
 1  4
 2  5
 3  6
```
"""
function batch(xs)
  data = first(xs) isa AbstractArray ?
    similar(first(xs), size(first(xs))..., length(xs)) :
    Vector{eltype(xs)}(undef, length(xs))
  for (i, x) in enumerate(xs)
    data[batchindex(data, i)...] = x
  end
  return data
end

Base.rpad(v::AbstractVector, n::Integer, p) = [v; fill(p, max(n - length(v), 0))]

"""
    batchseq(seqs, pad)

Take a list of `N` sequences, and turn them into a single sequence where each
item is a batch of `N`. Short sequences will be padded by `pad`.

```julia
julia> batchseq([[1, 2, 3], [4, 5]], 0)
3-element Array{Array{Int64,1},1}:
 [1, 4]
 [2, 5]
 [3, 0]
```
"""
function batchseq(xs, pad = nothing, n = maximum(length(x) for x in xs))
  xs_ = [rpad(x, n, pad) for x in xs]
  [batch([xs_[j][i] for j = 1:length(xs_)]) for i = 1:n]
end

# Other

"""
Returns a function that when invoked, will only be triggered at most once
during `timeout` seconds. Normally, the throttled function will run
as much as it can, without ever going more than once per `wait` duration;
but if you'd like to disable the execution on the leading edge, pass
`leading=false`. To enable execution on the trailing edge, ditto.
"""
function throttle(f, timeout; leading=true, trailing=false)
  cooldown = true
  later = nothing
  result = nothing

  function throttled(args...; kwargs...)
    yield()

    if cooldown
      if leading
        result = f(args...; kwargs...)
      else
        later = () -> f(args...; kwargs...)
      end

      cooldown = false
      @async try
        while (sleep(timeout); later != nothing)
          later()
          later = nothing
        end
      finally
        cooldown = true
      end
    elseif trailing
      later = () -> (result = f(args...; kwargs...))
    end

    return result
  end
end

import Base: +, -, *, reshape, size
import Base.Broadcast: broadcasted, Broadcasted, BroadcastStyle

"""
    Zeros()
    Zeros(size...)
    Zeros(Type, size...)

Acts as a stand-in for an array of zeros that can be
used during training which is ignored by the optimisers.

Useful to turn bias off for a forward pass of a layer.

!!! warning
    Zeros acts a scalar while broadcasting, so does not
    expand dims. Checks for shape compatibility by default.

## Examples

```julia
julia> Flux.Zeros(3,3)
3×3 Flux.Zeros{Bool,2}:
 false  false  false
 false  false  false
 false  false  false

julia> Flux.Zeros(Float32, 3,3)
3×3 Flux.Zeros{Float32,2}:
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0

julia> rand(3,3) .+ Flux.Zeros()
3×3 Array{Float64,2}:
 0.198739  0.490459  0.785386
 0.779074  0.39986   0.66383
 0.854981  0.447292  0.314497

julia> bias_less_conv = Conv((2,2), 1=>3, bias = Flux.Zeros())
Conv((2, 2), 1=>3)
```
"""
struct Zeros{T,N} <: AbstractArray{T,N}
  size::Tuple
end

Zeros(::Type{T}, sz...) where T = Zeros{T,length(sz)}(sz)
Zeros(sz::Integer...) = Zeros(Bool, sz...)

Base.size(xs::Zeros) = xs.size
Base.axes(xs::Zeros) = Base.OneTo.(size(xs))

Base.IndexStyle(::Type{<:Zeros}) = IndexCartesian()

Base.getindex(xs::Zeros{T,N}, I::Vararg{Int, N}) where {T,N} = zero(T)
Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} =
  Zeros(T, inds.stop)

Base.setindex(xs::Zeros, args...) =
  error("setindex disallowed on Zeros Array")
Base.setindex!(xs::Zeros, args...) =
  error("setindex! disallowed on Zeros Array")

Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs))

# Ignore during backwards pass
@adjoint reshape(xs::Zeros{T}, dims...) where T =
  reshape(xs, dims...), _ -> nothing

# Define basic ops
for f in (:+, :-)
 @eval $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = a
end
Base.:+(a::Zeros, b::AbstractArray) = b
Base.:-(a::Zeros, b::AbstractArray) = -b
Base.:*(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = zero(a)
Base.:*(a::Zeros, b::AbstractArray) = zero(a)

# Hook into broadcasting API - to allow using as a regular array
Base.BroadcastStyle(::Type{<:Zeros}) = Broadcast.ArrayStyle{Zeros}()
Broadcast.broadcastable(xs::Zeros) = xs
Base.BroadcastStyle(::Broadcast.ArrayStyle{Zeros}, ::Broadcast.DefaultArrayStyle{N}) where N =
  Broadcast.ArrayStyle{Zeros}()

function Base.similar(bc::Broadcasted{Broadcast.ArrayStyle{Flux.Zeros}}, ::Type{T}) where T
  similar(Array{T}, axes(bc))
end

Base.copy(xs::Zeros{T,N}) where {T,N} = Zeros(T, size(xs)...)

isZeros(x::Zeros) = true
isZeros(x) = false

function Base.copyto!(dest::AbstractArray, bc::Broadcasted{Broadcast.ArrayStyle{Flux.Zeros}})
  bc = Broadcast.flatten(bc)

  i = isZeros(first(bc.args)) ? 2 : 1 # findfirst(!isZeros, bc.args)
  dest .= bc.args[i]
end

"""
    @jit ...

The `@jit` annotation can be applied to any code, and the code will be compiled
for performance.

    @jit f(x) = @jit(x) + @jit(x)

Note that compilation happens regardless of the `@jit` macro, so it should only
be used for aesthetic purposes, or by recovering Python users.
"""
macro jit(ex)
  esc(ex)
end
-												pull out tuple utils

											
										
										
											2017-05-01 15:57:51 +00:00
+								# Arrays
-												float32 param initialisers

											
										
										
											2018-09-07 00:25:32 +00:00
+								glorot_uniform(dims...) = (rand(Float32, dims...) .- 0.5f0) .* sqrt(24.0f0/sum(dims))
 								glorot_normal(dims...) = randn(Float32, dims...) .* sqrt(2.0f0/sum(dims))
 								ones(T::Type, dims...) = Base.ones(T, dims...)
 								zeros(T::Type, dims...) = Base.zeros(T, dims...)
 								ones(dims...) = Base.ones(Float32, dims...)
 								zeros(dims...) = Base.zeros(Float32, dims...)
-												better default init

											
										
										
											2016-08-25 16:25:33 +00:00
-												onehot

											
										
										
											2017-09-06 22:58:55 +00:00
+								unsqueeze(xs, dim) = reshape(xs, (size(xs)[1:dim-1]..., 1, size(xs)[dim:end]...))
-												simplify organisation

											
										
										
											2017-08-19 19:52:29 +00:00
-												fixed stack/unstack function - in utils.jl for v1.0

											
										
										
											2019-01-04 01:32:11 +00:00
+								stack(xs, dim) = cat(unsqueeze.(xs, dim)..., dims=dim)
 								unstack(xs, dim) = [copy(selectdim(xs, dim, i)) for i in 1:size(xs, dim)]
-												pull out tuple utils

											
										
										
											2017-05-01 15:57:51 +00:00
-												chunk util

											
										
										
											2017-10-18 16:07:58 +00:00
+								"""
 								    chunk(xs, n)
 								Split `xs` into `n` parts.
 								```julia
 								julia> chunk(1:10, 3)
 -element Array{Array{Int64,1},1}:
 								 [1, 2, 3, 4]
 								 [5, 6, 7, 8]
 								 [9, 10]
 								```
 								"""
 								chunk(xs, n) = collect(Iterators.partition(xs, ceil(Int, length(xs)/n)))
-.0+ updates - indices to axes, Vector init with undef

											
										
										
											2018-10-02 19:39:00 +00:00
+								batchindex(xs, i) = (reverse(Base.tail(reverse(axes(xs))))..., i)
-												batch and batchseq apis

											
										
										
											2017-10-15 22:44:40 +00:00
-												frequencies utility

											
										
										
											2018-02-17 11:19:14 +00:00
+								"""
 								    frequencies(xs)
 								Count the number of times that each element of `xs` appears.
 								```julia
 								julia> frequencies(['a','b','b'])
 								Dict{Char,Int64} with 2 entries:
 								  'b' => 2
 								  'a' => 1
 								```
 								"""
 								function frequencies(xs)
 								  fs = Dict{eltype(xs),Int}()
 								  for x in xs
 								    fs[x] = get(fs, x, 0) + 1
 								  end
 								  return fs
 								end
-												single-batch convolution

											
										
										
											2018-02-28 14:25:32 +00:00
+								head(x::Tuple) = reverse(Base.tail(reverse(x)))
 								squeezebatch(x) = reshape(x, head(size(x)))
-												batching docs

											
										
										
											2017-10-18 15:21:15 +00:00
+								"""
 								  batch(xs)
 								Batch the arrays in `xs` into a single array.
 								```julia
 								julia> batch([[1,2,3],[4,5,6]])
 ×2 Array{Int64,2}:
 4
 5
 6
 								```
 								"""
-												batch and batchseq apis

											
										
										
											2017-10-15 22:44:40 +00:00
+								function batch(xs)
-												batching docs

											
										
										
											2017-10-18 15:21:15 +00:00
+								  data = first(xs) isa AbstractArray ?
 								    similar(first(xs), size(first(xs))..., length(xs)) :
-.0+ updates - indices to axes, Vector init with undef

											
										
										
											2018-10-02 19:39:00 +00:00
+								    Vector{eltype(xs)}(undef, length(xs))
-												batch and batchseq apis

											
										
										
											2017-10-15 22:44:40 +00:00
+								  for (i, x) in enumerate(xs)
 								    data[batchindex(data, i)...] = x
 								  end
 								  return data
 								end
 								Base.rpad(v::AbstractVector, n::Integer, p) = [v; fill(p, max(n - length(v), 0))]
-												batching docs

											
										
										
											2017-10-18 15:21:15 +00:00
+								"""
 								    batchseq(seqs, pad)
 								Take a list of `N` sequences, and turn them into a single sequence where each
 								item is a batch of `N`. Short sequences will be padded by `pad`.
 								```julia
 								julia> batchseq([[1, 2, 3], [4, 5]], 0)
 -element Array{Array{Int64,1},1}:
 								 [1, 4]
 								 [2, 5]
 								 [3, 0]
 								```
 								"""
 								function batchseq(xs, pad = nothing, n = maximum(length(x) for x in xs))
-												batch and batchseq apis

											
										
										
											2017-10-15 22:44:40 +00:00
+								  xs_ = [rpad(x, n, pad) for x in xs]
 								  [batch([xs_[j][i] for j = 1:length(xs_)]) for i = 1:n]
 								end
-												pull out tuple utils

											
										
										
											2017-05-01 15:57:51 +00:00
+								# Other
-												this is no longer specific to training

											
										
										
											2017-05-01 12:46:23 +00:00
-												remove batching and training

											
										
										
											2017-08-18 00:04:50 +00:00
+								"""
 								Returns a function that when invoked, will only be triggered at most once
 								during `timeout` seconds. Normally, the throttled function will run
 								as much as it can, without ever going more than once per `wait` duration;
 								but if you'd like to disable the execution on the leading edge, pass
 								`leading=false`. To enable execution on the trailing edge, ditto.
 								"""
 								function throttle(f, timeout; leading=true, trailing=false)
 								  cooldown = true
 								  later = nothing
-												closes #127

											
										
										
											2017-12-13 18:24:56 +00:00
+								  result = nothing
-												remove batching and training

											
										
										
											2017-08-18 00:04:50 +00:00
 								  function throttled(args...; kwargs...)
 								    yield()
 								    if cooldown
 								      if leading
-												closes #127

											
										
										
											2017-12-13 18:24:56 +00:00
+								        result = f(args...; kwargs...)
-												remove batching and training

											
										
										
											2017-08-18 00:04:50 +00:00
+								      else
 								        later = () -> f(args...; kwargs...)
 								      end
 								      cooldown = false
-												deprecations

											
										
										
											2018-06-20 14:18:07 +00:00
+								      @async try
-												remove batching and training

											
										
										
											2017-08-18 00:04:50 +00:00
+								        while (sleep(timeout); later != nothing)
 								          later()
 								          later = nothing
 								        end
 								      finally
 								        cooldown = true
 								      end
 								    elseif trailing
-												closes #127

											
										
										
											2017-12-13 18:24:56 +00:00
+								      later = () -> (result = f(args...; kwargs...))
-												remove batching and training

											
										
										
											2017-08-18 00:04:50 +00:00
+								    end
-												closes #127

											
										
										
											2017-12-13 18:24:56 +00:00
+								    return result
-												remove batching and training

											
										
										
											2017-08-18 00:04:50 +00:00
+								  end
 								end
-												Add jacobian function

											
										
										
											2017-12-08 13:46:12 +00:00
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								import Base: +, -, *, reshape, size
 								import Base.Broadcast: broadcasted, Broadcasted, BroadcastStyle
-												add ZeroType back

											
										
										
											2019-10-08 11:47:36 +00:00
-												ZeroType => Zeros

											
										
										
											2019-10-08 14:32:04 +00:00
+								"""
 								    Zeros()
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								    Zeros(size...)
 								    Zeros(Type, size...)
-												ZeroType => Zeros

											
										
										
											2019-10-08 14:32:04 +00:00
-												make Zeros a dimensionlesss number

											
										
										
											2019-10-22 10:41:27 +00:00
+								Acts as a stand-in for an array of zeros that can be
 								used during training which is ignored by the optimisers.
-												add ZeroType back

											
										
										
											2019-10-08 11:47:36 +00:00
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								Useful to turn bias off for a forward pass of a layer.
 								!!! warning
 								    Zeros acts a scalar while broadcasting, so does not
 								    expand dims. Checks for shape compatibility by default.
-												ZeroType => Zeros

											
										
										
											2019-10-08 14:32:04 +00:00
-												make Zeros a dimensionlesss number

											
										
										
											2019-10-22 10:41:27 +00:00
+								## Examples
-												ZeroType => Zeros

											
										
										
											2019-10-08 14:32:04 +00:00
-												make Zeros a dimensionlesss number

											
										
										
											2019-10-22 10:41:27 +00:00
+								```julia
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								julia> Flux.Zeros(3,3)
 ×3 Flux.Zeros{Bool,2}:
 								 false  false  false
 								 false  false  false
 								 false  false  false
 								julia> Flux.Zeros(Float32, 3,3)
 ×3 Flux.Zeros{Float32,2}:
 .0  0.0  0.0
 .0  0.0  0.0
 .0  0.0  0.0
-												make Zeros a dimensionlesss number

											
										
										
											2019-10-22 10:41:27 +00:00
+								julia> rand(3,3) .+ Flux.Zeros()
 ×3 Array{Float64,2}:
 .198739  0.490459  0.785386
 .779074  0.39986   0.66383
 .854981  0.447292  0.314497
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								julia> bias_less_conv = Conv((2,2), 1=>3, bias = Flux.Zeros())
-												make Zeros a dimensionlesss number

											
										
										
											2019-10-22 10:41:27 +00:00
+								Conv((2, 2), 1=>3)
 								```
 								"""
-												use array to define Zeros

											
										
										
											2019-10-23 14:32:15 +00:00
+								struct Zeros{T,N} <: AbstractArray{T,N}
 								  size::Tuple
 								end
 								Zeros(::Type{T}, sz...) where T = Zeros{T,length(sz)}(sz)
 								Zeros(sz::Integer...) = Zeros(Bool, sz...)
 								Base.size(xs::Zeros) = xs.size
 								Base.axes(xs::Zeros) = Base.OneTo.(size(xs))
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								Base.IndexStyle(::Type{<:Zeros}) = IndexCartesian()
 								Base.getindex(xs::Zeros{T,N}, I::Vararg{Int, N}) where {T,N} = zero(T)
 								Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} =
 								  Zeros(T, inds.stop)
-												use array to define Zeros

											
										
										
											2019-10-23 14:32:15 +00:00
+								Base.setindex(xs::Zeros, args...) =
 								  error("setindex disallowed on Zeros Array")
 								Base.setindex!(xs::Zeros, args...) =
 								  error("setindex! disallowed on Zeros Array")
 								Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs))
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								# Ignore during backwards pass
-												use array to define Zeros

											
										
										
											2019-10-23 14:32:15 +00:00
+								@adjoint reshape(xs::Zeros{T}, dims...) where T =
 								  reshape(xs, dims...), _ -> nothing
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								# Define basic ops
-												make Zeros a dimensionlesss number

											
										
										
											2019-10-22 10:41:27 +00:00
+								for f in (:+, :-)
-												use array to define Zeros

											
										
										
											2019-10-23 14:32:15 +00:00
+								 @eval $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = a
-												make Zeros a dimensionlesss number

											
										
										
											2019-10-22 10:41:27 +00:00
+								end
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								Base.:+(a::Zeros, b::AbstractArray) = b
 								Base.:-(a::Zeros, b::AbstractArray) = -b
-												use array to define Zeros

											
										
										
											2019-10-23 14:32:15 +00:00
+								Base.:*(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = zero(a)
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								Base.:*(a::Zeros, b::AbstractArray) = zero(a)
 								# Hook into broadcasting API - to allow using as a regular array
 								Base.BroadcastStyle(::Type{<:Zeros}) = Broadcast.ArrayStyle{Zeros}()
 								Broadcast.broadcastable(xs::Zeros) = xs
 								Base.BroadcastStyle(::Broadcast.ArrayStyle{Zeros}, ::Broadcast.DefaultArrayStyle{N}) where N =
 								  Broadcast.ArrayStyle{Zeros}()
-												ZeroType => Zeros

											
										
										
											2019-10-08 14:32:04 +00:00
-												hook into bcasting

											
										
										
											2019-11-07 11:23:41 +00:00
+								function Base.similar(bc::Broadcasted{Broadcast.ArrayStyle{Flux.Zeros}}, ::Type{T}) where T
 								  similar(Array{T}, axes(bc))
 								end
 								Base.copy(xs::Zeros{T,N}) where {T,N} = Zeros(T, size(xs)...)
 								isZeros(x::Zeros) = true
 								isZeros(x) = false
 								function Base.copyto!(dest::AbstractArray, bc::Broadcasted{Broadcast.ArrayStyle{Flux.Zeros}})
 								  bc = Broadcast.flatten(bc)
 								  i = isZeros(first(bc.args)) ? 2 : 1 # findfirst(!isZeros, bc.args)
 								  dest .= bc.args[i]
 								end
-												add ZeroType back

											
										
										
											2019-10-08 11:47:36 +00:00
-												jit macro

											
										
										
											2018-10-05 13:02:00 +00:00
+								"""
 								    @jit ...
 								The `@jit` annotation can be applied to any code, and the code will be compiled
 								for performance.
 								    @jit f(x) = @jit(x) + @jit(x)
 								Note that compilation happens regardless of the `@jit` macro, so it should only
 								be used for aesthetic purposes, or by recovering Python users.
 								"""
 								macro jit(ex)
 								  esc(ex)
 								end