Merge #873
873: Make bias optional r=MikeInnes a=dhairyagandhi96 Addresses #868 Co-authored-by: Dhairya Gandhi <dhairya@juliacopmuting.com>
This commit is contained in:
commit
5d9acc7e73
|
@ -27,6 +27,7 @@ using CuArrays
|
||||||
const use_cuda = Ref(false)
|
const use_cuda = Ref(false)
|
||||||
|
|
||||||
include("utils.jl")
|
include("utils.jl")
|
||||||
|
include("zeros.jl")
|
||||||
include("onehot.jl")
|
include("onehot.jl")
|
||||||
include("functor.jl")
|
include("functor.jl")
|
||||||
|
|
||||||
|
|
|
@ -30,27 +30,36 @@ function calc_padding(::SamePad, k::NTuple{N,T}, dilation, stride) where {N,T}
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Conv(size, in => out, σ = identity; init = glorot_uniform,
|
Conv(filter, in => out, σ = identity; init = glorot_uniform,
|
||||||
stride = 1, pad = 0, dilation = 1)
|
stride = 1, pad = 0, dilation = 1)
|
||||||
|
|
||||||
Standard convolutional layer. `size` should be a tuple like `(2, 2)`.
|
filter = (2,2)
|
||||||
|
in = 1
|
||||||
|
out = 16
|
||||||
|
Conv((2, 2), 1=>16, relu)
|
||||||
|
|
||||||
|
Standard convolutional layer. `filter` should be a tuple like `(2, 2)`.
|
||||||
`in` and `out` specify the number of input and output channels respectively.
|
`in` and `out` specify the number of input and output channels respectively.
|
||||||
|
|
||||||
Data should be stored in WHCN order (width, height, # channels, batch size).
|
Data should be stored in WHCN order (width, height, # channels, batch size).
|
||||||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||||
and a batch of 50 would be a `100×100×3×50` array.
|
and a batch of 50 would be a `100×100×3×50` array.
|
||||||
|
|
||||||
|
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||||
|
Setting `bias` to `Flux.Zeros()` will switch bias off for the layer.
|
||||||
|
|
||||||
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
||||||
|
|
||||||
# Examples
|
# Examples
|
||||||
|
|
||||||
Apply a `Conv` layer to a 1-channel input using a 2×2 window size, giving us a
|
Apply a `Conv` layer to a 1-channel input using a 2×2 window filter size, giving us a
|
||||||
16-channel output. Output is activated with ReLU.
|
16-channel output. Output is activated with ReLU.
|
||||||
```julia
|
```julia
|
||||||
size = (2,2)
|
filter = (2,2)
|
||||||
in = 1
|
in = 1
|
||||||
out = 16
|
out = 16
|
||||||
Conv(size, in => out, relu)
|
Conv(filter, in => out, relu)
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
struct Conv{N,M,F,A,V}
|
struct Conv{N,M,F,A,V}
|
||||||
|
@ -62,7 +71,28 @@ struct Conv{N,M,F,A,V}
|
||||||
dilation::NTuple{N,Int}
|
dilation::NTuple{N,Int}
|
||||||
end
|
end
|
||||||
|
|
||||||
function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
"""
|
||||||
|
Conv(weight::AbstractArray, bias::AbstractArray)
|
||||||
|
Conv(weight::AbstractArray, bias::AbstractArray, activation)
|
||||||
|
|
||||||
|
Constructs the convolutional layer with user defined weight and bias arrays.
|
||||||
|
|
||||||
|
Setting `bias` to `Flux.Zeros()` would switch `bias` off for the layer.
|
||||||
|
|
||||||
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
|
|
||||||
|
There is also a keyword-only constuctor available for all convoultional
|
||||||
|
layers.
|
||||||
|
|
||||||
|
```julia
|
||||||
|
weight = rand(Float32, 3, 3, 5)
|
||||||
|
bias = zeros(Float32, 5)
|
||||||
|
Conv(weight = weight,
|
||||||
|
bias = bias,
|
||||||
|
σ = sigmoid)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
function Conv(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = identity;
|
||||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
stride = expand(Val(N-2), stride)
|
stride = expand(Val(N-2), stride)
|
||||||
dilation = expand(Val(N-2), dilation)
|
dilation = expand(Val(N-2), dilation)
|
||||||
|
@ -70,10 +100,32 @@ function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
||||||
return Conv(σ, w, b, stride, pad, dilation)
|
return Conv(σ, w, b, stride, pad, dilation)
|
||||||
end
|
end
|
||||||
|
|
||||||
Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
|
function Conv(;weight::AbstractArray{T,N}, bias::Union{Zeros, AbstractVector{T}},
|
||||||
init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N =
|
activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
Conv(init(k..., ch...), zeros(ch[2]), σ,
|
Conv(weight, bias, activation, stride = stride, pad = pad, dilation = dilation)
|
||||||
stride = stride, pad = pad, dilation = dilation)
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
convfilter(filter::Tuple, in=>out)
|
||||||
|
|
||||||
|
Constructs a standard convolutional weight matrix with given `filter` and
|
||||||
|
channels from `in` to `out`.
|
||||||
|
|
||||||
|
Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
See also: [`depthwiseconvfilter`](@ref)
|
||||||
|
"""
|
||||||
|
convfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
|
||||||
|
init = glorot_uniform) where N = init(filter..., ch...)
|
||||||
|
|
||||||
|
function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
|
||||||
|
init = glorot_uniform, stride = 1, pad = 0, dilation = 1,
|
||||||
|
weight = convfilter(k, ch, init = init), bias = zeros(ch[2])) where N
|
||||||
|
|
||||||
|
Conv(weight, bias, σ,
|
||||||
|
stride = stride, pad = pad, dilation = dilation)
|
||||||
|
end
|
||||||
|
|
||||||
@functor Conv
|
@functor Conv
|
||||||
|
|
||||||
|
@ -114,16 +166,22 @@ outdims(l::Conv, isize) =
|
||||||
output_size(DenseConvDims(_paddims(isize, size(l.weight)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
|
output_size(DenseConvDims(_paddims(isize, size(l.weight)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ConvTranspose(size, in => out, σ = identity; init = glorot_uniform,
|
ConvTranspose(filter, in=>out)
|
||||||
|
ConvTranspose(filter, in=>out, activation)
|
||||||
|
ConvTranspose(filter, in => out, σ = identity; init = glorot_uniform,
|
||||||
stride = 1, pad = 0, dilation = 1)
|
stride = 1, pad = 0, dilation = 1)
|
||||||
|
|
||||||
Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
|
Standard convolutional transpose layer. `filter` should be a tuple like `(2, 2)`.
|
||||||
`in` and `out` specify the number of input and output channels respectively.
|
`in` and `out` specify the number of input and output channels respectively.
|
||||||
|
|
||||||
Data should be stored in WHCN order (width, height, # channels, batch size).
|
Data should be stored in WHCN order (width, height, # channels, batch size).
|
||||||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||||
and a batch of 50 would be a `100×100×3×50` array.
|
and a batch of 50 would be a `100×100×3×50` array.
|
||||||
|
|
||||||
|
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||||
|
Setting `bias` to `Flux.Zeros()` will switch bias off for the layer.
|
||||||
|
|
||||||
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
Use `pad=SamePad()` to apply padding so that outputsize == stride * inputsize - stride + 1.
|
Use `pad=SamePad()` to apply padding so that outputsize == stride * inputsize - stride + 1.
|
||||||
"""
|
"""
|
||||||
struct ConvTranspose{N,M,F,A,V}
|
struct ConvTranspose{N,M,F,A,V}
|
||||||
|
@ -135,18 +193,39 @@ struct ConvTranspose{N,M,F,A,V}
|
||||||
dilation::NTuple{N,Int}
|
dilation::NTuple{N,Int}
|
||||||
end
|
end
|
||||||
|
|
||||||
function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
"""
|
||||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
ConvTranspose(weight::AbstractArray, bias::AbstractArray)
|
||||||
|
ConvTranspose(weight::AbstractArray, bias::AbstractArray, activation)
|
||||||
|
|
||||||
|
Constructs the convolutional transpose layer with user defined weight and bias arrays.
|
||||||
|
forward pass.
|
||||||
|
|
||||||
|
Setting `bias` to `Flux.Zeros()` would switch `bias` off for the layer.
|
||||||
|
|
||||||
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
|
|
||||||
|
For keyword-only constuctor, see also [`Conv`](@ref)
|
||||||
|
"""
|
||||||
|
function ConvTranspose(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = identity;
|
||||||
|
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
stride = expand(Val(N-2), stride)
|
stride = expand(Val(N-2), stride)
|
||||||
dilation = expand(Val(N-2), dilation)
|
dilation = expand(Val(N-2), dilation)
|
||||||
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
||||||
return ConvTranspose(σ, w, b, stride, pad, dilation)
|
return ConvTranspose(σ, w, b, stride, pad, dilation)
|
||||||
end
|
end
|
||||||
|
|
||||||
ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
|
function ConvTranspose(;weight::AbstractArray{T,N}, bias::Union{Zeros, AbstractVector{T}},
|
||||||
init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N =
|
activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
ConvTranspose(init(k..., reverse(ch)...), zeros(ch[2]), σ,
|
ConvTranspose(weight, bias, activation, stride = stride, pad = pad, dilation = dilation)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
|
||||||
|
init = glorot_uniform, stride = 1, pad = 0, dilation = 1,
|
||||||
|
weight = convfilter(k, reverse(ch), init = init), bias = zeros(ch[2])) where N
|
||||||
|
|
||||||
|
ConvTranspose(weight, bias, σ,
|
||||||
stride = stride, pad = pad, dilation = dilation)
|
stride = stride, pad = pad, dilation = dilation)
|
||||||
|
end
|
||||||
|
|
||||||
@functor ConvTranspose
|
@functor ConvTranspose
|
||||||
|
|
||||||
|
@ -158,9 +237,9 @@ function conv_transpose_dims(c::ConvTranspose, x::AbstractArray)
|
||||||
batch_size = size(x)[end]
|
batch_size = size(x)[end]
|
||||||
# Create DenseConvDims() that looks like the corresponding conv()
|
# Create DenseConvDims() that looks like the corresponding conv()
|
||||||
return DenseConvDims((I..., C_in, batch_size), size(c.weight);
|
return DenseConvDims((I..., C_in, batch_size), size(c.weight);
|
||||||
stride=c.stride,
|
stride=c.stride,
|
||||||
padding=c.pad,
|
padding=c.pad,
|
||||||
dilation=c.dilation,
|
dilation=c.dilation,
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -171,7 +250,7 @@ function (c::ConvTranspose)(x::AbstractArray)
|
||||||
# ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
|
# ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1)))
|
||||||
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
|
σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1)
|
||||||
cdims = conv_transpose_dims(c, x)
|
cdims = conv_transpose_dims(c, x)
|
||||||
return σ.(∇conv_data(x, c.weight, cdims) .+ b)
|
σ.(∇conv_data(x, c.weight, cdims) .+ b)
|
||||||
end
|
end
|
||||||
|
|
||||||
function Base.show(io::IO, l::ConvTranspose)
|
function Base.show(io::IO, l::ConvTranspose)
|
||||||
|
@ -190,10 +269,12 @@ end
|
||||||
outdims(l::ConvTranspose{N}, isize) where N = _convtransoutdims(isize[1:2], size(l.weight)[1:N], l.stride, l.dilation, l.pad)
|
outdims(l::ConvTranspose{N}, isize) where N = _convtransoutdims(isize[1:2], size(l.weight)[1:N], l.stride, l.dilation, l.pad)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
DepthwiseConv(size, in => out, σ = identity; init = glorot_uniform,
|
DepthwiseConv(filter::Tuple, in=>out)
|
||||||
|
DepthwiseConv(filter::Tuple, in=>out, activation)
|
||||||
|
DepthwiseConv(filter, in => out, σ = identity; init = glorot_uniform,
|
||||||
stride = 1, pad = 0, dilation = 1)
|
stride = 1, pad = 0, dilation = 1)
|
||||||
|
|
||||||
Depthwise convolutional layer. `size` should be a tuple like `(2, 2)`.
|
Depthwise convolutional layer. `filter` should be a tuple like `(2, 2)`.
|
||||||
`in` and `out` specify the number of input and output channels respectively.
|
`in` and `out` specify the number of input and output channels respectively.
|
||||||
Note that `out` must be an integer multiple of `in`.
|
Note that `out` must be an integer multiple of `in`.
|
||||||
|
|
||||||
|
@ -201,6 +282,10 @@ Data should be stored in WHCN order (width, height, # channels, batch size).
|
||||||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||||
and a batch of 50 would be a `100×100×3×50` array.
|
and a batch of 50 would be a `100×100×3×50` array.
|
||||||
|
|
||||||
|
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||||
|
Setting `bias` to `Flux.Zeros()` will switch bias off for the layer.
|
||||||
|
|
||||||
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
||||||
"""
|
"""
|
||||||
struct DepthwiseConv{N,M,F,A,V}
|
struct DepthwiseConv{N,M,F,A,V}
|
||||||
|
@ -212,20 +297,54 @@ struct DepthwiseConv{N,M,F,A,V}
|
||||||
dilation::NTuple{N,Int}
|
dilation::NTuple{N,Int}
|
||||||
end
|
end
|
||||||
|
|
||||||
function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
"""
|
||||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
DepthwiseConv(weight::AbstractArray, bias::AbstractArray)
|
||||||
|
DepthwiseConv(weight::AbstractArray, bias::AbstractArray, activation)
|
||||||
|
|
||||||
|
Constructs the `DepthwiseConv` layer with user defined weight and bias arrays.
|
||||||
|
forward pass.
|
||||||
|
|
||||||
|
Setting `bias` to `Flux.Zeros()` would switch `bias` off for the layer.
|
||||||
|
|
||||||
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
|
|
||||||
|
For keyword-only constuctor, see also [`Conv`](@ref)
|
||||||
|
"""
|
||||||
|
function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = identity;
|
||||||
|
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
stride = expand(Val(N-2), stride)
|
stride = expand(Val(N-2), stride)
|
||||||
dilation = expand(Val(N-2), dilation)
|
dilation = expand(Val(N-2), dilation)
|
||||||
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
||||||
return DepthwiseConv(σ, w, b, stride, pad, dilation)
|
return DepthwiseConv(σ, w, b, stride, pad, dilation)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function DepthwiseConv(;weight::AbstractArray{T,N}, bias::Union{Zeros, AbstractVector{T}},
|
||||||
|
activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
|
DepthwiseConv(weight, bias, activation, stride = stride, pad = pad, dilation = dilation)
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
depthwiseconvfilter(filter::Tuple, in=>out)
|
||||||
|
|
||||||
|
Constructs a depthwise convolutional weight array defined by `filter` and channels
|
||||||
|
from `in` to `out`.
|
||||||
|
|
||||||
|
Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
See also: [`convfilter`](@ref)
|
||||||
|
"""
|
||||||
|
depthwiseconvfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
|
||||||
|
init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1])
|
||||||
|
|
||||||
function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
|
function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
|
||||||
init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N
|
init = glorot_uniform, stride = 1, pad = 0, dilation = 1,
|
||||||
|
weight = depthwiseconvfilter(k, ch, init = init), bias = zeros(ch[2])) where N
|
||||||
@assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels"
|
@assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels"
|
||||||
|
|
||||||
return DepthwiseConv(
|
return DepthwiseConv(
|
||||||
init(k..., div(ch[2], ch[1]), ch[1]),
|
weight,
|
||||||
zeros(ch[2]),
|
bias,
|
||||||
σ;
|
σ;
|
||||||
stride = stride,
|
stride = stride,
|
||||||
pad = pad,
|
pad = pad,
|
||||||
|
@ -258,24 +377,30 @@ outdims(l::DepthwiseConv, isize) =
|
||||||
output_size(DepthwiseConvDims(_paddims(isize, (1, 1, size(l.weight)[end], 1)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
|
output_size(DepthwiseConvDims(_paddims(isize, (1, 1, size(l.weight)[end], 1)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation))
|
||||||
|
|
||||||
"""
|
"""
|
||||||
CrossCor(size, in => out, σ = identity; init = glorot_uniform,
|
CrossCor(filter, in=>out)
|
||||||
|
CrossCor(filter, in=>out, activation)
|
||||||
|
CrossCor(filter, in => out, σ = identity; init = glorot_uniform,
|
||||||
stride = 1, pad = 0, dilation = 1)
|
stride = 1, pad = 0, dilation = 1)
|
||||||
|
|
||||||
Standard cross convolutional layer. `size` should be a tuple like `(2, 2)`.
|
Standard cross convolutional layer. `filter` should be a tuple like `(2, 2)`.
|
||||||
`in` and `out` specify the number of input and output channels respectively.
|
`in` and `out` specify the number of input and output channels respectively.
|
||||||
|
|
||||||
Data should be stored in WHCN order (width, height, # channels, batch size).
|
Data should be stored in WHCN order (width, height, # channels, batch size).
|
||||||
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
In other words, a 100×100 RGB image would be a `100×100×3×1` array,
|
||||||
and a batch of 50 would be a `100×100×3×50` array.
|
and a batch of 50 would be a `100×100×3×50` array.
|
||||||
|
|
||||||
|
Accepts keyword arguments `weight` and `bias` to set the corresponding fields.
|
||||||
|
Setting `bias` to `Flux.Zeros()` will switch bias off for the layer.
|
||||||
|
|
||||||
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
Use `pad=SamePad()` to apply padding so that outputsize == inputsize / stride.
|
||||||
|
|
||||||
# Examples
|
# Examples
|
||||||
|
|
||||||
Apply a `CrossCor` layer to a 1-channel input using a 2×2 window size, giving us a
|
Apply a `CrossCor` layer to a 1-channel input using a 2×2 window filter size, giving us a
|
||||||
16-channel output. Output is activated with ReLU.
|
16-channel output. Output is activated with ReLU.
|
||||||
```julia
|
```julia
|
||||||
size = (2,2)
|
filter = (2,2)
|
||||||
in = 1
|
in = 1
|
||||||
out = 16
|
out = 16
|
||||||
CrossCor((2, 2), 1=>16, relu)
|
CrossCor((2, 2), 1=>16, relu)
|
||||||
|
@ -290,18 +415,39 @@ struct CrossCor{N,M,F,A,V}
|
||||||
dilation::NTuple{N,Int}
|
dilation::NTuple{N,Int}
|
||||||
end
|
end
|
||||||
|
|
||||||
function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity;
|
"""
|
||||||
stride = 1, pad = 0, dilation = 1) where {T,N}
|
CrossCor(weight::AbstractArray, bias::AbstractArray)
|
||||||
|
CrossCor(weight::AbstractArray, bias::AbstractArray, activation)
|
||||||
|
|
||||||
|
Constructs the standard cross convolutional layer with user defined weight and bias
|
||||||
|
arrays.
|
||||||
|
|
||||||
|
Setting `bias` to `Flux.Zeros()` would switch `bias` off for the layer.
|
||||||
|
|
||||||
|
Takes the keyword arguments `pad`, `stride` and `dilation`.
|
||||||
|
|
||||||
|
For keyword-only constuctor, see also [`Conv`](@ref)
|
||||||
|
"""
|
||||||
|
function CrossCor(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = identity;
|
||||||
|
stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
stride = expand(Val(N-2), stride)
|
stride = expand(Val(N-2), stride)
|
||||||
dilation = expand(Val(N-2), dilation)
|
dilation = expand(Val(N-2), dilation)
|
||||||
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
pad = calc_padding(pad, size(w)[1:N-2], dilation, stride)
|
||||||
return CrossCor(σ, w, b, stride, pad, dilation)
|
return CrossCor(σ, w, b, stride, pad, dilation)
|
||||||
end
|
end
|
||||||
|
|
||||||
CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
|
function CrossCor(;weight::AbstractArray{T,N}, bias::Union{Zeros, AbstractVector{T}},
|
||||||
init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N =
|
activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N}
|
||||||
CrossCor(init(k..., ch...), zeros(ch[2]), σ,
|
CrossCor(weight, bias, activation, stride = stride, pad = pad, dilation = dilation)
|
||||||
|
end
|
||||||
|
|
||||||
|
function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
|
||||||
|
init = glorot_uniform, stride = 1, pad = 0, dilation = 1,
|
||||||
|
weight = convfilter(k, ch, init = init), bias = zeros(ch[2])) where N
|
||||||
|
|
||||||
|
CrossCor(weight, bias, σ,
|
||||||
stride = stride, pad = pad, dilation = dilation)
|
stride = stride, pad = pad, dilation = dilation)
|
||||||
|
end
|
||||||
|
|
||||||
@functor CrossCor
|
@functor CrossCor
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,106 @@
|
||||||
|
import Base: +, -, *, reshape, size
|
||||||
|
import Base.Broadcast: broadcasted, Broadcasted, BroadcastStyle
|
||||||
|
|
||||||
|
"""
|
||||||
|
Zeros()
|
||||||
|
Zeros(size...)
|
||||||
|
Zeros(Type, size...)
|
||||||
|
|
||||||
|
Acts as a stand-in for an array of zeros that can be
|
||||||
|
used during training which is ignored by the optimisers.
|
||||||
|
|
||||||
|
Useful to turn bias off for a forward pass of a layer.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```julia
|
||||||
|
julia> Flux.Zeros(3,3)
|
||||||
|
3×3 Flux.Zeros{Bool,2}:
|
||||||
|
false false false
|
||||||
|
false false false
|
||||||
|
false false false
|
||||||
|
|
||||||
|
julia> Flux.Zeros(Float32, 3,3)
|
||||||
|
3×3 Flux.Zeros{Float32,2}:
|
||||||
|
0.0 0.0 0.0
|
||||||
|
0.0 0.0 0.0
|
||||||
|
0.0 0.0 0.0
|
||||||
|
|
||||||
|
julia> rand(3,3) .+ Flux.Zeros()
|
||||||
|
3×3 Array{Float64,2}:
|
||||||
|
0.198739 0.490459 0.785386
|
||||||
|
0.779074 0.39986 0.66383
|
||||||
|
0.854981 0.447292 0.314497
|
||||||
|
|
||||||
|
julia> bias_less_conv = Conv((2,2), 1=>3, bias = Flux.Zeros())
|
||||||
|
Conv((2, 2), 1=>3)
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
struct Zeros{T,N} <: AbstractArray{T,N}
|
||||||
|
size::Tuple
|
||||||
|
end
|
||||||
|
|
||||||
|
Zeros(::Type{T}, sz...) where T = Zeros{T,length(sz)}(sz)
|
||||||
|
Zeros(sz::Integer...) = Zeros(Bool, sz...)
|
||||||
|
|
||||||
|
Base.size(xs::Zeros) = xs.size
|
||||||
|
Base.axes(xs::Zeros) = Base.OneTo.(size(xs))
|
||||||
|
|
||||||
|
Base.IndexStyle(::Type{<:Zeros}) = IndexLinear()
|
||||||
|
|
||||||
|
Base.getindex(xs::Zeros{T,N}, I::Int) where {T,N} = zero(T)
|
||||||
|
Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} =
|
||||||
|
Zeros(T, length(inds))
|
||||||
|
|
||||||
|
Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs))
|
||||||
|
|
||||||
|
@adjoint reshape(xs::Zeros{T}, dims...) where T =
|
||||||
|
reshape(xs, dims...), _ -> nothing
|
||||||
|
|
||||||
|
# Define basic ops
|
||||||
|
for f in (:+, :-)
|
||||||
|
@eval @inline function $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros)
|
||||||
|
@assert size(a) == size(b) throw(DimensionMismatch("dimensions must match"))
|
||||||
|
a
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
+(a::Zeros, b::AbstractArray) = b + a
|
||||||
|
-(a::Zeros, b::AbstractArray) = -b + a
|
||||||
|
|
||||||
|
Base.copy(xs::Zeros{T,N}) where {T,N} = xs
|
||||||
|
|
||||||
|
# Define broadcasting behaviour
|
||||||
|
for op in (:+, :-)
|
||||||
|
@eval function broadcasted(::typeof($op), a::AbstractArray, b::Zeros)
|
||||||
|
bs = Broadcast.broadcast_shape(size(a), size(b))
|
||||||
|
size(a) == bs && return a
|
||||||
|
sz = similar(a, bs)
|
||||||
|
sz .= a
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(+, b, a)
|
||||||
|
broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(+, -b, a)
|
||||||
|
|
||||||
|
function broadcasted(::typeof(*), a::AbstractArray, b::Zeros)
|
||||||
|
Zeros(Broadcast.broadcast_shape(size(a), size(b))...)
|
||||||
|
end
|
||||||
|
|
||||||
|
broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(*, b, a)
|
||||||
|
|
||||||
|
for op in (:+, :-, :*)
|
||||||
|
@eval broadcasted(::typeof($op), a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Some opportunities to avoid scalar indexing, intermediaries
|
||||||
|
# Since it replicates a little of what we expect Base to do,
|
||||||
|
# it should be possible to remove in the future, but for now,
|
||||||
|
# these help with performance.
|
||||||
|
broadcasted(::typeof(+), a::AbstractArray, b::Zeros{T,0}) where T = a
|
||||||
|
broadcasted(::typeof(+), a::Zeros{T,0}, b::AbstractArray) where T = b
|
||||||
|
broadcasted(::typeof(-), a::AbstractArray, b::Zeros{T,0}) where T = a
|
||||||
|
broadcasted(::typeof(-), a::Zeros{T,0}, b::AbstractArray) where T = -b
|
||||||
|
broadcasted(::typeof(*), a::AbstractArray, b::Zeros{T,0}) where T = zero(a)
|
||||||
|
broadcasted(::typeof(*), a::Zeros{T,0}, b::AbstractArray) where T = zero(b)
|
||||||
|
broadcasted(::typeof(/), a::Zeros{T,0}, b::AbstractArray) where T = zero(b)
|
|
@ -25,6 +25,35 @@ end
|
||||||
Dense(288, 10), softmax)
|
Dense(288, 10), softmax)
|
||||||
|
|
||||||
@test size(m(r)) == (10, 5)
|
@test size(m(r)) == (10, 5)
|
||||||
|
|
||||||
|
# Test bias switch
|
||||||
|
bias = Conv(ones(Float32, 2, 2, 1, 3), ones(Float32, 3))
|
||||||
|
ip = zeros(Float32, 28,28,1,1)
|
||||||
|
|
||||||
|
op = bias(ip)
|
||||||
|
@test sum(op) == prod(size(op))
|
||||||
|
|
||||||
|
bias = Conv((2,2), 1=>3, bias = Flux.Zeros())
|
||||||
|
op = bias(ip)
|
||||||
|
@test sum(op) === 0.f0
|
||||||
|
gs = gradient(() -> sum(bias(ip)), Flux.params(bias))
|
||||||
|
@test gs[bias.bias] == nothing
|
||||||
|
|
||||||
|
# Train w/o bias and make sure no convergence happens
|
||||||
|
# when only bias can be converged
|
||||||
|
bias = Conv((2, 2), 1=>3, bias = Flux.Zeros());
|
||||||
|
ip = zeros(Float32, 28,28,1,1)
|
||||||
|
op = zeros(Float32, 27,27,3,1) .+ 2.f0
|
||||||
|
opt = Descent()
|
||||||
|
|
||||||
|
for _ = 1:10^3
|
||||||
|
gs = gradient(params(bias)) do
|
||||||
|
Flux.mse(bias(ip), op)
|
||||||
|
end
|
||||||
|
Flux.Optimise.update!(opt, params(bias), gs)
|
||||||
|
end
|
||||||
|
|
||||||
|
@test Flux.mse(bias(ip), op) ≈ 4.f0
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "asymmetric padding" begin
|
@testset "asymmetric padding" begin
|
||||||
|
|
Loading…
Reference in New Issue