ZeroType => Zeros
This commit is contained in:
parent
95c5845e99
commit
49ea43e711
@ -22,7 +22,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
|||||||
and a batch of 50 would be a `100×100×3×50` array.
|
and a batch of 50 would be a `100×100×3×50` array.
|
||||||
|
|
||||||
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||||
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
|
Setting `bias` to `Flux.Zeros()` will switch bias off for the
|
||||||
layer.
|
layer.
|
||||||
|
|
||||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
@ -44,17 +44,17 @@ Constructs the convolutional layer with user defined weight and bias arrays.
|
|||||||
All other behaviours of the Conv layer apply with regard to data order and
|
All other behaviours of the Conv layer apply with regard to data order and
|
||||||
forward pass.
|
forward pass.
|
||||||
|
|
||||||
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
|
Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
|
||||||
layer.
|
layer.
|
||||||
|
|
||||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
"""
|
"""
|
||||||
function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
|
function Conv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
|
||||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
stride = expand(Val(N-2), stride)
|
stride = expand(Val(N-2), stride)
|
||||||
pad = expand(Val(2*(N-2)), pad)
|
pad = expand(Val(2*(N-2)), pad)
|
||||||
dilation = expand(Val(N-2), dilation)
|
dilation = expand(Val(N-2), dilation)
|
||||||
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
|
b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
|
||||||
return Conv(σ, w, b, stride, pad, dilation)
|
return Conv(σ, w, b, stride, pad, dilation)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -114,7 +114,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
|
|||||||
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
|
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
|
||||||
|
|
||||||
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||||
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
|
Setting `bias` to `Flux.Zeros()` will switch bias off for the
|
||||||
layer.
|
layer.
|
||||||
|
|
||||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
@ -136,17 +136,17 @@ Constructs the convolutional transpose layer with user defined weight and bias a
|
|||||||
All other behaviours of the ConvTranspose layer apply with regard to data order and
|
All other behaviours of the ConvTranspose layer apply with regard to data order and
|
||||||
forward pass.
|
forward pass.
|
||||||
|
|
||||||
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
|
Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
|
||||||
layer.
|
layer.
|
||||||
|
|
||||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
"""
|
"""
|
||||||
function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
|
function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
|
||||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
stride = expand(Val(N-2), stride)
|
stride = expand(Val(N-2), stride)
|
||||||
pad = expand(Val(2*(N-2)), pad)
|
pad = expand(Val(2*(N-2)), pad)
|
||||||
dilation = expand(Val(N-2), dilation)
|
dilation = expand(Val(N-2), dilation)
|
||||||
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
|
b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
|
||||||
return ConvTranspose(σ, w, b, stride, pad, dilation)
|
return ConvTranspose(σ, w, b, stride, pad, dilation)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -206,7 +206,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would
|
|||||||
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
|
be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array.
|
||||||
|
|
||||||
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||||
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
|
Setting `bias` to `Flux.Zeros()` will switch bias off for the
|
||||||
layer.
|
layer.
|
||||||
|
|
||||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
@ -228,17 +228,17 @@ Constructs the `DepthwiseConv` layer with user defined weight and bias arrays.
|
|||||||
All other behaviours of the `DepthwiseConv` layer apply with regard to data order and
|
All other behaviours of the `DepthwiseConv` layer apply with regard to data order and
|
||||||
forward pass.
|
forward pass.
|
||||||
|
|
||||||
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
|
Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
|
||||||
layer.
|
layer.
|
||||||
|
|
||||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
"""
|
"""
|
||||||
function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
|
function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
|
||||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
stride = expand(Val(N-2), stride)
|
stride = expand(Val(N-2), stride)
|
||||||
pad = expand(Val(2*(N-2)), pad)
|
pad = expand(Val(2*(N-2)), pad)
|
||||||
dilation = expand(Val(N-2), dilation)
|
dilation = expand(Val(N-2), dilation)
|
||||||
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
|
b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
|
||||||
return DepthwiseConv(σ, w, b, stride, pad, dilation)
|
return DepthwiseConv(σ, w, b, stride, pad, dilation)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -312,7 +312,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
|||||||
and a batch of 50 would be a `100×100×3×50` array.
|
and a batch of 50 would be a `100×100×3×50` array.
|
||||||
|
|
||||||
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||||
Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the
|
Setting `bias` to `Flux.Zeros()` will switch bias off for the
|
||||||
layer.
|
layer.
|
||||||
|
|
||||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
@ -334,17 +334,17 @@ Constructs the standard cross convolutional layer with user defined weight and b
|
|||||||
arrays. All other behaviours of the CrossCor layer apply with regard to data order and
|
arrays. All other behaviours of the CrossCor layer apply with regard to data order and
|
||||||
forward pass.
|
forward pass.
|
||||||
|
|
||||||
Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the
|
Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the
|
||||||
layer.
|
layer.
|
||||||
|
|
||||||
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
"""
|
"""
|
||||||
function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
|
function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity;
|
||||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
stride = expand(Val(N-2), stride)
|
stride = expand(Val(N-2), stride)
|
||||||
pad = expand(Val(2*(N-2)), pad)
|
pad = expand(Val(2*(N-2)), pad)
|
||||||
dilation = expand(Val(N-2), dilation)
|
dilation = expand(Val(N-2), dilation)
|
||||||
b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
|
b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b
|
||||||
return CrossCor(σ, w, b, stride, pad, dilation)
|
return CrossCor(σ, w, b, stride, pad, dilation)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
28
src/utils.jl
28
src/utils.jl
@ -141,16 +141,28 @@ end
|
|||||||
|
|
||||||
import Base: +, reshape, size
|
import Base: +, reshape, size
|
||||||
|
|
||||||
struct ZeroType{T} <: Number
|
"""
|
||||||
size::T
|
Zeros()
|
||||||
|
Zeros(T, a::Union{Colon, Int}...)
|
||||||
|
|
||||||
|
Acts as a stand-in for an array of zeros that can be used during training which is
|
||||||
|
ignored by the optimisers.
|
||||||
|
"""
|
||||||
|
struct Zeros{T} <: Number
|
||||||
|
size::Tuple
|
||||||
end
|
end
|
||||||
|
|
||||||
+(a::Number, ::ZeroType) = a
|
Zeros(::Type{T}, sz...) where T = Zeros{T}(sz)
|
||||||
+(::ZeroType, a::Number) = a
|
Zeros(sz::Union{Integer, Colon}...) = Zeros(Bool, sz...)
|
||||||
size(xs::ZeroType) = xs.size
|
|
||||||
reshape(::ZeroType, args...) = ZeroType(args)
|
+(a::Number, ::Zeros) = a
|
||||||
@adjoint reshape(xs::ZeroType, dims...) =
|
+(::Zeros, a::Number) = a
|
||||||
ZeroType(dims), Δ -> (ZeroType(size(xs)), map(_ -> nothing, dims)...)
|
|
||||||
|
size(xs::Zeros) = xs.size
|
||||||
|
reshape(z::Zeros{T}, args...) where T = Zeros(T, args...)
|
||||||
|
|
||||||
|
@adjoint reshape(xs::Zeros{T}, dims...) where T =
|
||||||
|
Zeros(T, dims...), Δ -> (Zeros(T, size(xs)...), map(_ -> nothing, dims)...)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@jit ...
|
@jit ...
|
||||||
|
@ -28,13 +28,13 @@ end
|
|||||||
op = bias(ip)
|
op = bias(ip)
|
||||||
@test sum(op) == prod(size(op))
|
@test sum(op) == prod(size(op))
|
||||||
|
|
||||||
bias = Conv((2,2), 1=>3, bias = Flux.ZeroType((3,)))
|
bias = Conv((2,2), 1=>3, bias = Flux.Zeros())
|
||||||
op = bias(ip)
|
op = bias(ip)
|
||||||
@test sum(op) === 0.f0
|
@test sum(op) === 0.f0
|
||||||
|
|
||||||
# Train w/o bias and make sure no convergence happens
|
# Train w/o bias and make sure no convergence happens
|
||||||
# when only bias can be converged
|
# when only bias can be converged
|
||||||
bias = Conv((2, 2), 1=>3, bias = Flux.ZeroType((3,)));
|
bias = Conv((2, 2), 1=>3, bias = Flux.Zeros());
|
||||||
ip = zeros(Float32, 28,28,1,1)
|
ip = zeros(Float32, 28,28,1,1)
|
||||||
op = zeros(Float32, 27,27,3,1) .+ 2.f0
|
op = zeros(Float32, 27,27,3,1) .+ 2.f0
|
||||||
opt = Descent()
|
opt = Descent()
|
||||||
|
Loading…
Reference in New Issue
Block a user