ZeroType => Zeros

This commit is contained in:
Dhairya Gandhi 2019-10-08 20:02:04 +05:30
parent 95c5845e99
commit 49ea43e711
3 changed files with 38 additions and 26 deletions

View File

@ -22,7 +22,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
and a batch of 50 would be a `100×100×3×50` array. and a batch of 50 would be a `100×100×3×50` array.
Accepts keyword arguments `weight` and `bias` to set the corresponding fields. Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the Setting `bias` to `Flux.Zeros()` will switch bias off for the
layer. layer.
Takes the keyword arguments `pad`, `stride` and `dilation`. Takes the keyword arguments `pad`, `stride` and `dilation`.
@ -44,17 +44,17 @@ Constructs the convolutional layer with user defined weight and bias arrays.
All other behaviours of the Conv layer apply with regard to data order and All other behaviours of the Conv layer apply with regard to data order and
forward pass. forward pass.
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
layer. layer.
Takes the keyword arguments `pad`, `stride` and `dilation`. Takes the keyword arguments `pad`, `stride` and `dilation`.
""" """
function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; function Conv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N} stride = 1, pad = 0, dilation = 1) where {T,N}
stride = expand(Val(N-2), stride) stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad) pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation) dilation = expand(Val(N-2), dilation)
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
return Conv(σ, w, b, stride, pad, dilation) return Conv(σ, w, b, stride, pad, dilation)
end end
@ -114,7 +114,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
Accepts keyword arguments `weight` and `bias` to set the corresponding fields. Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the Setting `bias` to `Flux.Zeros()` will switch bias off for the
layer. layer.
Takes the keyword arguments `pad`, `stride` and `dilation`. Takes the keyword arguments `pad`, `stride` and `dilation`.
@ -136,17 +136,17 @@ Constructs the convolutional transpose layer with user defined weight and bias a
All other behaviours of the ConvTranspose layer apply with regard to data order and All other behaviours of the ConvTranspose layer apply with regard to data order and
forward pass. forward pass.
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
layer. layer.
Takes the keyword arguments `pad`, `stride` and `dilation`. Takes the keyword arguments `pad`, `stride` and `dilation`.
""" """
function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N} stride = 1, pad = 0, dilation = 1) where {T,N}
stride = expand(Val(N-2), stride) stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad) pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation) dilation = expand(Val(N-2), dilation)
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
return ConvTranspose(σ, w, b, stride, pad, dilation) return ConvTranspose(σ, w, b, stride, pad, dilation)
end end
@ -206,7 +206,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
Accepts keyword arguments `weight` and `bias` to set the corresponding fields. Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the Setting `bias` to `Flux.Zeros()` will switch bias off for the
layer. layer.
Takes the keyword arguments `pad`, `stride` and `dilation`. Takes the keyword arguments `pad`, `stride` and `dilation`.
@ -228,17 +228,17 @@ Constructs the `DepthwiseConv` layer with user defined weight and bias arrays.
All other behaviours of the `DepthwiseConv` layer apply with regard to data order and All other behaviours of the `DepthwiseConv` layer apply with regard to data order and
forward pass. forward pass.
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
layer. layer.
Takes the keyword arguments `pad`, `stride` and `dilation`. Takes the keyword arguments `pad`, `stride` and `dilation`.
""" """
function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N} stride = 1, pad = 0, dilation = 1) where {T,N}
stride = expand(Val(N-2), stride) stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad) pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation) dilation = expand(Val(N-2), dilation)
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
return DepthwiseConv(σ, w, b, stride, pad, dilation) return DepthwiseConv(σ, w, b, stride, pad, dilation)
end end
@ -312,7 +312,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
and a batch of 50 would be a `100×100×3×50` array. and a batch of 50 would be a `100×100×3×50` array.
Accepts keyword arguments `weight` and `bias` to set the corresponding fields. Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the Setting `bias` to `Flux.Zeros()` will switch bias off for the
layer. layer.
Takes the keyword arguments `pad`, `stride` and `dilation`. Takes the keyword arguments `pad`, `stride` and `dilation`.
@ -334,17 +334,17 @@ Constructs the standard cross convolutional layer with user defined weight and b
arrays. All other behaviours of the CrossCor layer apply with regard to data order and arrays. All other behaviours of the CrossCor layer apply with regard to data order and
forward pass. forward pass.
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
layer. layer.
Takes the keyword arguments `pad`, `stride` and `dilation`. Takes the keyword arguments `pad`, `stride` and `dilation`.
""" """
function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
stride = 1, pad = 0, dilation = 1) where {T,N} stride = 1, pad = 0, dilation = 1) where {T,N}
stride = expand(Val(N-2), stride) stride = expand(Val(N-2), stride)
pad = expand(Val(2*(N-2)), pad) pad = expand(Val(2*(N-2)), pad)
dilation = expand(Val(N-2), dilation) dilation = expand(Val(N-2), dilation)
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
return CrossCor(σ, w, b, stride, pad, dilation) return CrossCor(σ, w, b, stride, pad, dilation)
end end

View File

@ -141,16 +141,28 @@ end
import Base: +, reshape, size import Base: +, reshape, size
struct ZeroType{T} <: Number """
size::T Zeros()
Zeros(T, a::Union{Colon, Int}...)
Acts as a stand-in for an array of zeros that can be used during training which is
ignored by the optimisers.
"""
struct Zeros{T} <: Number
size::Tuple
end end
+(a::Number, ::ZeroType) = a Zeros(::Type{T}, sz...) where T = Zeros{T}(sz)
+(::ZeroType, a::Number) = a Zeros(sz::Union{Integer, Colon}...) = Zeros(Bool, sz...)
size(xs::ZeroType) = xs.size
reshape(::ZeroType, args...) = ZeroType(args) +(a::Number, ::Zeros) = a
@adjoint reshape(xs::ZeroType, dims...) = +(::Zeros, a::Number) = a
ZeroType(dims), Δ -> (ZeroType(size(xs)), map(_ -> nothing, dims)...)
size(xs::Zeros) = xs.size
reshape(z::Zeros{T}, args...) where T = Zeros(T, args...)
@adjoint reshape(xs::Zeros{T}, dims...) where T =
Zeros(T, dims...), Δ -> (Zeros(T, size(xs)...), map(_ -> nothing, dims)...)
""" """
@jit ... @jit ...

View File

@ -28,13 +28,13 @@ end
op = bias(ip) op = bias(ip)
@test sum(op) == prod(size(op)) @test sum(op) == prod(size(op))
bias = Conv((2,2), 1=>3, bias = Flux.ZeroType((3,))) bias = Conv((2,2), 1=>3, bias = Flux.Zeros())
op = bias(ip) op = bias(ip)
@test sum(op) === 0.f0 @test sum(op) === 0.f0
# Train w/o bias and make sure no convergence happens # Train w/o bias and make sure no convergence happens
# when only bias can be converged # when only bias can be converged
bias = Conv((2, 2), 1=>3, bias = Flux.ZeroType((3,))); bias = Conv((2, 2), 1=>3, bias = Flux.Zeros());
ip = zeros(Float32, 28,28,1,1) ip = zeros(Float32, 28,28,1,1)
op = zeros(Float32, 27,27,3,1) .+ 2.f0 op = zeros(Float32, 27,27,3,1) .+ 2.f0
opt = Descent() opt = Descent()