ZeroType => Zeros
This commit is contained in:
parent
95c5845e99
commit
49ea43e711
|
@ -22,7 +22,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
|||
and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
|
||||
Setting `bias` to `Flux.Zeros()` will switch bias off for the
|
||||
layer.
|
||||
|
||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||
|
@ -44,17 +44,17 @@ Constructs the convolutional layer with user defined weight and bias arrays.
|
|||
All other behaviours of the Conv layer apply with regard to data order and
|
||||
forward pass.
|
||||
|
||||
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
|
||||
Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
|
||||
layer.
|
||||
|
||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||
"""
|
||||
function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
|
||||
function Conv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
|
||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||
stride = expand(Val(N-2), stride)
|
||||
pad = expand(Val(2*(N-2)), pad)
|
||||
dilation = expand(Val(N-2), dilation)
|
||||
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
|
||||
b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
|
||||
return Conv(σ, w, b, stride, pad, dilation)
|
||||
end
|
||||
|
||||
|
@ -114,7 +114,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
|
|||
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
|
||||
Setting `bias` to `Flux.Zeros()` will switch bias off for the
|
||||
layer.
|
||||
|
||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||
|
@ -136,17 +136,17 @@ Constructs the convolutional transpose layer with user defined weight and bias a
|
|||
All other behaviours of the ConvTranspose layer apply with regard to data order and
|
||||
forward pass.
|
||||
|
||||
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
|
||||
Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
|
||||
layer.
|
||||
|
||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||
"""
|
||||
function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
|
||||
function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
|
||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||
stride = expand(Val(N-2), stride)
|
||||
pad = expand(Val(2*(N-2)), pad)
|
||||
dilation = expand(Val(N-2), dilation)
|
||||
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
|
||||
b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
|
||||
return ConvTranspose(σ, w, b, stride, pad, dilation)
|
||||
end
|
||||
|
||||
|
@ -206,7 +206,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
|
|||
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
|
||||
Setting `bias` to `Flux.Zeros()` will switch bias off for the
|
||||
layer.
|
||||
|
||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||
|
@ -228,17 +228,17 @@ Constructs the `DepthwiseConv` layer with user defined weight and bias arrays.
|
|||
All other behaviours of the `DepthwiseConv` layer apply with regard to data order and
|
||||
forward pass.
|
||||
|
||||
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
|
||||
Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
|
||||
layer.
|
||||
|
||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||
"""
|
||||
function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
|
||||
function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
|
||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||
stride = expand(Val(N-2), stride)
|
||||
pad = expand(Val(2*(N-2)), pad)
|
||||
dilation = expand(Val(N-2), dilation)
|
||||
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
|
||||
b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
|
||||
return DepthwiseConv(σ, w, b, stride, pad, dilation)
|
||||
end
|
||||
|
||||
|
@ -312,7 +312,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
|||
and a batch of 50 would be a `100×100×3×50` array.
|
||||
|
||||
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
|
||||
Setting `bias` to `Flux.Zeros()` will switch bias off for the
|
||||
layer.
|
||||
|
||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||
|
@ -334,17 +334,17 @@ Constructs the standard cross convolutional layer with user defined weight and b
|
|||
arrays. All other behaviours of the CrossCor layer apply with regard to data order and
|
||||
forward pass.
|
||||
|
||||
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
|
||||
Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
|
||||
layer.
|
||||
|
||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||
"""
|
||||
function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
|
||||
function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
|
||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||
stride = expand(Val(N-2), stride)
|
||||
pad = expand(Val(2*(N-2)), pad)
|
||||
dilation = expand(Val(N-2), dilation)
|
||||
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
|
||||
b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
|
||||
return CrossCor(σ, w, b, stride, pad, dilation)
|
||||
end
|
||||
|
||||
|
|
28
src/utils.jl
28
src/utils.jl
|
@ -141,16 +141,28 @@ end
|
|||
|
||||
import Base: +, reshape, size
|
||||
|
||||
struct ZeroType{T} <: Number
|
||||
size::T
|
||||
"""
|
||||
Zeros()
|
||||
Zeros(T, a::Union{Colon, Int}...)
|
||||
|
||||
Acts as a stand-in for an array of zeros that can be used during training which is
|
||||
ignored by the optimisers.
|
||||
"""
|
||||
struct Zeros{T} <: Number
|
||||
size::Tuple
|
||||
end
|
||||
|
||||
+(a::Number, ::ZeroType) = a
|
||||
+(::ZeroType, a::Number) = a
|
||||
size(xs::ZeroType) = xs.size
|
||||
reshape(::ZeroType, args...) = ZeroType(args)
|
||||
@adjoint reshape(xs::ZeroType, dims...) =
|
||||
ZeroType(dims), Δ -> (ZeroType(size(xs)), map(_ -> nothing, dims)...)
|
||||
Zeros(::Type{T}, sz...) where T = Zeros{T}(sz)
|
||||
Zeros(sz::Union{Integer, Colon}...) = Zeros(Bool, sz...)
|
||||
|
||||
+(a::Number, ::Zeros) = a
|
||||
+(::Zeros, a::Number) = a
|
||||
|
||||
size(xs::Zeros) = xs.size
|
||||
reshape(z::Zeros{T}, args...) where T = Zeros(T, args...)
|
||||
|
||||
@adjoint reshape(xs::Zeros{T}, dims...) where T =
|
||||
Zeros(T, dims...), Δ -> (Zeros(T, size(xs)...), map(_ -> nothing, dims)...)
|
||||
|
||||
"""
|
||||
@jit ...
|
||||
|
|
|
@ -28,13 +28,13 @@ end
|
|||
op = bias(ip)
|
||||
@test sum(op) == prod(size(op))
|
||||
|
||||
bias = Conv((2,2), 1=>3, bias = Flux.ZeroType((3,)))
|
||||
bias = Conv((2,2), 1=>3, bias = Flux.Zeros())
|
||||
op = bias(ip)
|
||||
@test sum(op) === 0.f0
|
||||
|
||||
# Train w/o bias and make sure no convergence happens
|
||||
# when only bias can be converged
|
||||
bias = Conv((2, 2), 1=>3, bias = Flux.ZeroType((3,)));
|
||||
bias = Conv((2, 2), 1=>3, bias = Flux.Zeros());
|
||||
ip = zeros(Float32, 28,28,1,1)
|
||||
op = zeros(Float32, 27,27,3,1) .+ 2.f0
|
||||
opt = Descent()
|
||||
|
|
Loading…
Reference in New Issue