ZeroType => Zeros

2019-10-08 20:02:04 +05:30 · 2019-10-08 20:02:04 +05:30 · 49ea43e711
commit 49ea43e711
parent 95c5845e99
3 changed files with 38 additions and 26 deletions
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@ -22,7 +22,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.

 Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
-Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
+Setting `bias` to `Flux.Zeros()` will switch bias off for the
 layer.

 Takes the keyword arguments `pad`, `stride` and `dilation`.
@ -44,17 +44,17 @@ Constructs the convolutional layer with user defined weight and bias arrays.
 All other behaviours of the Conv layer apply with regard to data order and
 forward pass.

-Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
+Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
 layer.

 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
-function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
+function Conv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
-  b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
+  b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
  return Conv(σ, w, b, stride, pad, dilation)
 end

@ -114,7 +114,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
 be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.

 Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
-Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
+Setting `bias` to `Flux.Zeros()` will switch bias off for the
 layer.

 Takes the keyword arguments `pad`, `stride` and `dilation`.
@ -136,17 +136,17 @@ Constructs the convolutional transpose layer with user defined weight and bias a
 All other behaviours of the ConvTranspose layer apply with regard to data order and
 forward pass.

-Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
+Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
 layer.

 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
-function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
+function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
-  b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
+  b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
  return ConvTranspose(σ, w, b, stride, pad, dilation)
 end

@ -206,7 +206,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
 be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.

 Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
-Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
+Setting `bias` to `Flux.Zeros()` will switch bias off for the
 layer.

 Takes the keyword arguments `pad`, `stride` and `dilation`.
@ -228,17 +228,17 @@ Constructs the `DepthwiseConv` layer with user defined weight and bias arrays.
 All other behaviours of the `DepthwiseConv` layer apply with regard to data order and
 forward pass.

-Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
+Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
 layer.

 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
-function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
+function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
                       stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
-  b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
+  b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
  return DepthwiseConv(σ, w, b, stride, pad, dilation)
 end

@ -312,7 +312,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
 and a batch of 50 would be a `100×100×3×50` array.

 Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
-Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
+Setting `bias` to `Flux.Zeros()` will switch bias off for the
 layer.

 Takes the keyword arguments `pad`, `stride` and `dilation`.
@ -334,17 +334,17 @@ Constructs the standard cross convolutional layer with user defined weight and b
 arrays. All other behaviours of the CrossCor layer apply with regard to data order and
 forward pass.

-Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
+Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
 layer.

 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
-function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
+function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
-  b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
+  b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
  return CrossCor(σ, w, b, stride, pad, dilation)
 end

--- a/src/utils.jl
+++ b/src/utils.jl
@ -141,16 +141,28 @@ end

 import Base: +, reshape, size

-struct ZeroType{T} <: Number
-  size::T
+"""
+    Zeros()
+    Zeros(T, a::Union{Colon, Int}...)
+
+Acts as a stand-in for an array of zeros that can be used during training which is
+ignored by the optimisers.
+"""
+struct Zeros{T} <: Number
+  size::Tuple
 end

-+(a::Number, ::ZeroType) = a
-+(::ZeroType, a::Number) = a
-size(xs::ZeroType) = xs.size
-reshape(::ZeroType, args...) = ZeroType(args)
-@adjoint reshape(xs::ZeroType, dims...) =
-  ZeroType(dims), Δ -> (ZeroType(size(xs)), map(_ -> nothing, dims)...)
+Zeros(::Type{T}, sz...) where T = Zeros{T}(sz)
+Zeros(sz::Union{Integer, Colon}...) = Zeros(Bool, sz...)
+
+(a::Number, ::Zeros) = a
+(::Zeros, a::Number) = a
+
+size(xs::Zeros) = xs.size
+reshape(z::Zeros{T}, args...) where T = Zeros(T, args...)
+
+@adjoint reshape(xs::Zeros{T}, dims...) where T =
+  Zeros(T, dims...), Δ -> (Zeros(T, size(xs)...), map(_ -> nothing, dims)...)

 """
    @jit ...
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@ -28,13 +28,13 @@ end
  op = bias(ip)
  @test sum(op) == prod(size(op))

-  bias = Conv((2,2), 1=>3, bias = Flux.ZeroType((3,)))
+  bias = Conv((2,2), 1=>3, bias = Flux.Zeros())
  op = bias(ip)
  @test sum(op) === 0.f0

  # Train w/o bias and make sure no convergence happens
  # when only bias can be converged
-  bias = Conv((2, 2), 1=>3, bias = Flux.ZeroType((3,)));
+  bias = Conv((2, 2), 1=>3, bias = Flux.Zeros());
  ip = zeros(Float32, 28,28,1,1)
  op = zeros(Float32, 27,27,3,1) .+ 2.f0
  opt = Descent()