From 5ea6a33f443a4efe1fb2a2e045f501e67399fbc8 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 27 Sep 2019 11:48:12 +0530 Subject: [PATCH 01/39] make bias optional --- src/layers/conv.jl | 19 ++++++++++++------- test/layers/conv.jl | 11 +++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 519f129f..26a34306 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -21,7 +21,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. +Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. """ struct Conv{N,M,F,A,V} σ::F @@ -30,29 +30,34 @@ struct Conv{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} + use_bias::Bool end function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} + stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return Conv(σ, w, b, stride, pad, dilation) + return Conv(σ, w, b, stride, pad, dilation, use_bias) end Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N = + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N = Conv(init(k..., ch...), zeros(ch[2]), σ, - stride = stride, pad = pad, dilation = dilation) + stride = stride, pad = pad, dilation = dilation, use_bias = use_bias) @functor Conv function (c::Conv)(x::AbstractArray) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - σ.(conv(x, c.weight, cdims) .+ b) + if c.use_bias + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(conv(x, c.weight, cdims) .+ b) + else + c.σ.(conv(x, c.weight, cdims)) + end end function Base.show(io::IO, l::Conv) diff --git a/test/layers/conv.jl b/test/layers/conv.jl index aa3925f1..2ac61e24 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -20,6 +20,17 @@ end Dense(288, 10), softmax) @test size(m(r)) == (10, 5) + + # Test bias switch + bias = Conv(ones(Float32, 2, 2, 1, 3), ones(Float32, 3)) + ip = zeros(Float32, 28,28,1,1) + + op = bias(ip) + @test sum(op) == prod(size(op)) + + bias = Conv(ones(Float32, 2, 2, 1, 3), ones(Float32, 3), use_bias = false) + op = bias(ip) + @test sum(op) === 0.f0 end @testset "asymmetric padding" begin From 9f2ac8fdef99b2257d566af0f41d46c7a5f57172 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 27 Sep 2019 12:04:27 +0530 Subject: [PATCH 02/39] ditto remaining conv layers --- src/layers/conv.jl | 52 ++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 26a34306..a427c143 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -92,20 +92,21 @@ struct ConvTranspose{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} + use_bias::Bool end function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} + stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return ConvTranspose(σ, w, b, stride, pad, dilation) + return ConvTranspose(σ, w, b, stride, pad, dilation, use_bias) end ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N = + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N = ConvTranspose(init(k..., reverse(ch)...), zeros(ch[2]), σ, - stride = stride, pad = pad, dilation = dilation) + stride = stride, pad = pad, dilation = dilation, use_bias = use_bias) @functor ConvTranspose @@ -125,9 +126,13 @@ end function (c::ConvTranspose)(x::AbstractArray) # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = conv_transpose_dims(c, x) - return σ.(∇conv_data(x, c.weight, cdims) .+ b) + if c.use_bias + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(∇conv_data(x, c.weight, cdims) .+ b) + else + c.σ.(∇conv_data(x, c.weight, cdims)) + end end function Base.show(io::IO, l::ConvTranspose) @@ -162,18 +167,19 @@ struct DepthwiseConv{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} + use_bias::Bool end function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} + stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return DepthwiseConv(σ, w, b, stride, pad, dilation) + return DepthwiseConv(σ, w, b, stride, pad, dilation, use_bias) end function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" return DepthwiseConv( init(k..., div(ch[2], ch[1]), ch[1]), @@ -181,16 +187,21 @@ function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = σ; stride = stride, pad = pad, - dilation = dilation + dilation = dilation, + use_bias = use_bias ) end @functor DepthwiseConv function (c::DepthwiseConv)(x) - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - σ.(depthwiseconv(x, c.weight, cdims) .+ b) + if c.use_bias + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(depthwiseconv(x, c.weight, cdims) .+ b) + else + c.σ.(depthwiseconv(x, c.weight, cdims)) + end end function Base.show(io::IO, l::DepthwiseConv) @@ -234,20 +245,21 @@ struct CrossCor{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} + use_bias::Bool end function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} + stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return CrossCor(σ, w, b, stride, pad, dilation) + return CrossCor(σ, w, b, stride, pad, dilation, use_bias) end CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N = + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N = CrossCor(init(k..., ch...), zeros(ch[2]), σ, - stride = stride, pad = pad, dilation = dilation) + stride = stride, pad = pad, dilation = dilation, use_bias = use_bias) @functor CrossCor @@ -259,9 +271,13 @@ end function (c::CrossCor)(x::AbstractArray) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - σ.(crosscor(x, c.weight, cdims) .+ b) + if c.use_bias + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(crosscor(x, c.weight, cdims) .+ b) + else + c.σ.(crosscor(x, c.weight, cdims)) + end end function Base.show(io::IO, l::CrossCor) From a801fcb9e7e5075bad34cd0fde3d4eb85828cb5d Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 27 Sep 2019 12:07:55 +0530 Subject: [PATCH 03/39] docstrings --- src/layers/conv.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index a427c143..f77fb58c 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -83,7 +83,7 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. +Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. """ struct ConvTranspose{N,M,F,A,V} σ::F @@ -158,7 +158,7 @@ Note that `out` must be an integer multiple of `in`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. +Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. """ struct DepthwiseConv{N,M,F,A,V} σ::F @@ -236,7 +236,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. +Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. """ struct CrossCor{N,M,F,A,V} σ::F From dced8c04e5e605c925433f138a169fc394959f93 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 1 Oct 2019 21:25:07 +0530 Subject: [PATCH 04/39] use ZeroType --- src/layers/conv.jl | 108 ++++++++++++++++++++------------------------ src/utils.jl | 10 ++++ test/layers/conv.jl | 6 ++- 3 files changed, 64 insertions(+), 60 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index f77fb58c..a8ab158f 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -21,7 +21,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. +Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct Conv{N,M,F,A,V} σ::F @@ -30,34 +30,32 @@ struct Conv{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} - use_bias::Bool end -function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} +function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return Conv(σ, w, b, stride, pad, dilation, use_bias) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b + return Conv(σ, w, b, stride, pad, dilation) end -Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N = - Conv(init(k..., ch...), zeros(ch[2]), σ, - stride = stride, pad = pad, dilation = dilation, use_bias = use_bias) +function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N + b = use_bias ? zeros(ch[2]) : ZeroType((ch[2],)) + Conv(init(k..., ch...), b, σ, + stride = stride, pad = pad, dilation = dilation) +end @functor Conv function (c::Conv)(x::AbstractArray) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - if c.use_bias - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(conv(x, c.weight, cdims) .+ b) - else - c.σ.(conv(x, c.weight, cdims)) - end + σ.(conv(x, c.weight, cdims) .+ b) end function Base.show(io::IO, l::Conv) @@ -83,7 +81,7 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. +Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct ConvTranspose{N,M,F,A,V} σ::F @@ -92,21 +90,23 @@ struct ConvTranspose{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} - use_bias::Bool end -function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} +function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return ConvTranspose(σ, w, b, stride, pad, dilation, use_bias) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b + return ConvTranspose(σ, w, b, stride, pad, dilation) end -ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N = -ConvTranspose(init(k..., reverse(ch)...), zeros(ch[2]), σ, - stride = stride, pad = pad, dilation = dilation, use_bias = use_bias) +function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N + b = use_bias ? zeros(ch[2]) : ZeroType((ch[2], )) + ConvTranspose(init(k..., reverse(ch)...), b, σ, + stride = stride, pad = pad, dilation = dilation) +end @functor ConvTranspose @@ -126,13 +126,9 @@ end function (c::ConvTranspose)(x::AbstractArray) # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = conv_transpose_dims(c, x) - if c.use_bias - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(∇conv_data(x, c.weight, cdims) .+ b) - else - c.σ.(∇conv_data(x, c.weight, cdims)) - end + σ.(∇conv_data(x, c.weight, cdims) .+ b) end function Base.show(io::IO, l::ConvTranspose) @@ -158,7 +154,7 @@ Note that `out` must be an integer multiple of `in`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. +Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct DepthwiseConv{N,M,F,A,V} σ::F @@ -167,41 +163,37 @@ struct DepthwiseConv{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} - use_bias::Bool end -function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} +function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return DepthwiseConv(σ, w, b, stride, pad, dilation, use_bias) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b + return DepthwiseConv(σ, w, b, stride, pad, dilation) end function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" + b = use_bias ? zeros(ch[2]) : ZeroType((ch[2], )) return DepthwiseConv( init(k..., div(ch[2], ch[1]), ch[1]), - zeros(ch[2]), + b, σ; stride = stride, pad = pad, - dilation = dilation, - use_bias = use_bias + dilation = dilation ) end @functor DepthwiseConv function (c::DepthwiseConv)(x) + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - if c.use_bias - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(depthwiseconv(x, c.weight, cdims) .+ b) - else - c.σ.(depthwiseconv(x, c.weight, cdims)) - end + σ.(depthwiseconv(x, c.weight, cdims) .+ b) end function Base.show(io::IO, l::DepthwiseConv) @@ -236,7 +228,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. +Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct CrossCor{N,M,F,A,V} σ::F @@ -245,21 +237,23 @@ struct CrossCor{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} - use_bias::Bool end -function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} +function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return CrossCor(σ, w, b, stride, pad, dilation, use_bias) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b + return CrossCor(σ, w, b, stride, pad, dilation) end -CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N = - CrossCor(init(k..., ch...), zeros(ch[2]), σ, - stride = stride, pad = pad, dilation = dilation, use_bias = use_bias) +function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N + b = use_bias ? zeros(ch[2]) : ZeroType((ch[2],)) + CrossCor(init(k..., ch...), b, σ, + stride = stride, pad = pad, dilation = dilation) +end @functor CrossCor @@ -271,13 +265,9 @@ end function (c::CrossCor)(x::AbstractArray) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - if c.use_bias - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) - σ.(crosscor(x, c.weight, cdims) .+ b) - else - c.σ.(crosscor(x, c.weight, cdims)) - end + σ.(crosscor(x, c.weight, cdims) .+ b) end function Base.show(io::IO, l::CrossCor) diff --git a/src/utils.jl b/src/utils.jl index 246c30d7..0507efa5 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -139,6 +139,16 @@ function throttle(f, timeout; leading=true, trailing=false) end end +import Base: +, reshape, size +struct ZeroType{T} <: Number + size::T +end ++(a::Number, ::ZeroType) = a ++(::ZeroType, a::Number) = a +size(xs::ZeroType) = xs.size +reshape(::ZeroType, args...) = ZeroType(args) +@adjoint reshape(xs::ZeroType, dims...) = ZeroType(dims), Δ -> (ZeroType(size(xs)), map(_ -> nothing, dims)...) + """ @jit ... diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 2ac61e24..fe5c575b 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -28,7 +28,11 @@ end op = bias(ip) @test sum(op) == prod(size(op)) - bias = Conv(ones(Float32, 2, 2, 1, 3), ones(Float32, 3), use_bias = false) + bias = Conv(ones(Float32, 2, 2, 1, 3), Flux.ZeroType((3,))) + op = bias(ip) + @test sum(op) === 0.f0 + + bias = Conv(ones(Float32, 2, 2, 1, 3), nothing) op = bias(ip) @test sum(op) === 0.f0 end From 1fe321781b38edc48233cf3a3a47dd54b81e569b Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 1 Oct 2019 21:29:18 +0530 Subject: [PATCH 05/39] add to docs --- src/layers/conv.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index a8ab158f..2a5ab981 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -21,7 +21,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. +Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. """ struct Conv{N,M,F,A,V} σ::F @@ -81,7 +81,7 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. +Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. """ struct ConvTranspose{N,M,F,A,V} σ::F @@ -154,7 +154,7 @@ Note that `out` must be an integer multiple of `in`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. +Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. """ struct DepthwiseConv{N,M,F,A,V} σ::F @@ -228,7 +228,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `pad`, `stride` and `dilation`. +Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. """ struct CrossCor{N,M,F,A,V} σ::F From 55ef7c1aba83479610b6ed0d8d48ffc769304f68 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 6 Oct 2019 04:25:23 +0530 Subject: [PATCH 06/39] add weight and bias kwargs --- src/layers/conv.jl | 23 ++++++++++++++++++----- src/utils.jl | 9 --------- test/layers/conv.jl | 6 +----- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 2a5ab981..8a9edb64 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -32,19 +32,32 @@ struct Conv{N,M,F,A,V} dilation::NTuple{N,Int} end -function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; +""" + Conv(weight::AbstractArray, bias::AbstractArray) + Conv(weight::AbstractArray, bias::AbstractArray, relu) + +Constructs the convolutional layer with user defined weight and bias arrays. +All other behaviours of the Conv layer apply with regard to data order and +forward pass. + +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return Conv(σ, w, b, stride, pad, dilation) end +convweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; init = glorot_uniform) = init(k..., ch...) +const convbias = zeros + function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N - b = use_bias ? zeros(ch[2]) : ZeroType((ch[2],)) - Conv(init(k..., ch...), b, σ, + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = convweight(k, ch, init = init), bias = convbias(ch[2])) where N + + Conv(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) end diff --git a/src/utils.jl b/src/utils.jl index 0507efa5..a12b59b7 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -139,15 +139,6 @@ function throttle(f, timeout; leading=true, trailing=false) end end -import Base: +, reshape, size -struct ZeroType{T} <: Number - size::T -end -+(a::Number, ::ZeroType) = a -+(::ZeroType, a::Number) = a -size(xs::ZeroType) = xs.size -reshape(::ZeroType, args...) = ZeroType(args) -@adjoint reshape(xs::ZeroType, dims...) = ZeroType(dims), Δ -> (ZeroType(size(xs)), map(_ -> nothing, dims)...) """ @jit ... diff --git a/test/layers/conv.jl b/test/layers/conv.jl index fe5c575b..169c3077 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -28,11 +28,7 @@ end op = bias(ip) @test sum(op) == prod(size(op)) - bias = Conv(ones(Float32, 2, 2, 1, 3), Flux.ZeroType((3,))) - op = bias(ip) - @test sum(op) === 0.f0 - - bias = Conv(ones(Float32, 2, 2, 1, 3), nothing) + bias = Conv((2,2), 1=>3, bias = zero(3)) op = bias(ip) @test sum(op) === 0.f0 end From 48a305bd21183e1d8df664a31c8b22611603509b Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 6 Oct 2019 04:41:06 +0530 Subject: [PATCH 07/39] ditto remaining layers --- src/layers/conv.jl | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 8a9edb64..ee2b8f79 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -42,7 +42,7 @@ forward pass. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function Conv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; +function Conv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) @@ -105,19 +105,19 @@ struct ConvTranspose{N,M,F,A,V} dilation::NTuple{N,Int} end -function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; +function ConvTranspose(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return ConvTranspose(σ, w, b, stride, pad, dilation) end function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N - b = use_bias ? zeros(ch[2]) : ZeroType((ch[2], )) - ConvTranspose(init(k..., reverse(ch)...), b, σ, + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = convweight(k, reverse(ch), init = init), bias = convbias(ch[2])) where N + + ConvTranspose(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) end @@ -178,22 +178,24 @@ struct DepthwiseConv{N,M,F,A,V} dilation::NTuple{N,Int} end -function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; +function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Number AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return DepthwiseConv(σ, w, b, stride, pad, dilation) end +depthwiseconvweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; + init = glorot_uniform) where N = init(k..., div(ch[2], ch[1]), ch[1]) + function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = depthwiseconvweight(k, ch, init = init), bias = convbias(ch[2])) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" - b = use_bias ? zeros(ch[2]) : ZeroType((ch[2], )) return DepthwiseConv( - init(k..., div(ch[2], ch[1]), ch[1]), - b, + weight, + bias, σ; stride = stride, pad = pad, @@ -252,7 +254,7 @@ struct CrossCor{N,M,F,A,V} dilation::NTuple{N,Int} end -function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; +function CrossCor(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) @@ -262,9 +264,9 @@ function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVec end function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N - b = use_bias ? zeros(ch[2]) : ZeroType((ch[2],)) - CrossCor(init(k..., ch...), b, σ, + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = convweight(k, ch, init = init), bias = convbias(ch[2])) where N + CrossCor(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) end From e97d61f2575628527d5571163646a7e9d59e4c3a Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 6 Oct 2019 04:42:26 +0530 Subject: [PATCH 08/39] fixes --- src/layers/conv.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index ee2b8f79..0acf3551 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -21,7 +21,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. +Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct Conv{N,M,F,A,V} σ::F From d00f833c17c2e18b4a5817390b93a1b35e4d8554 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 6 Oct 2019 04:44:50 +0530 Subject: [PATCH 09/39] rm ZeroType --- src/layers/conv.jl | 1 - src/utils.jl | 1 - 2 files changed, 2 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 0acf3551..d05dfe4d 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -259,7 +259,6 @@ function CrossCor(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return CrossCor(σ, w, b, stride, pad, dilation) end diff --git a/src/utils.jl b/src/utils.jl index a12b59b7..246c30d7 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -139,7 +139,6 @@ function throttle(f, timeout; leading=true, trailing=false) end end - """ @jit ... From 2ae3ad3b3182143423d8252db0261f1ff6a357a6 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 6 Oct 2019 04:46:13 +0530 Subject: [PATCH 10/39] doc fixes --- src/layers/conv.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index d05dfe4d..f02adb41 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -94,7 +94,7 @@ Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. +Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct ConvTranspose{N,M,F,A,V} σ::F @@ -167,7 +167,7 @@ Note that `out` must be an integer multiple of `in`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. +Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct DepthwiseConv{N,M,F,A,V} σ::F @@ -243,7 +243,7 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. -Takes the keyword arguments `use_bias`, `pad`, `stride` and `dilation`. +Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct CrossCor{N,M,F,A,V} σ::F From 214f71f49273e135cfcffb93d87b92e041139128 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 6 Oct 2019 04:55:33 +0530 Subject: [PATCH 11/39] add N --- src/layers/conv.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index f02adb41..53d138db 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -50,7 +50,8 @@ function Conv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = i return Conv(σ, w, b, stride, pad, dilation) end -convweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; init = glorot_uniform) = init(k..., ch...) +convweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; + init = glorot_uniform) where N = init(k..., ch...) const convbias = zeros function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; From a1e826b888171541d32a6d59db2b14fdc62e95ff Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Sun, 6 Oct 2019 05:10:56 +0530 Subject: [PATCH 12/39] fixes --- src/layers/conv.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 53d138db..c7bda4ab 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -179,7 +179,7 @@ struct DepthwiseConv{N,M,F,A,V} dilation::NTuple{N,Int} end -function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Number AbstractVector{T}}, σ = identity; +function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) From f3904b4e0490bdd7a1b60c16bfc86372736f6cfa Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 8 Oct 2019 17:17:36 +0530 Subject: [PATCH 13/39] add ZeroType back --- src/utils.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/utils.jl b/src/utils.jl index 246c30d7..a42c37d5 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -139,6 +139,19 @@ function throttle(f, timeout; leading=true, trailing=false) end end +import Base: +, reshape, size + +struct ZeroType{T} <: Number + size::T +end + ++(a::Number, ::ZeroType) = a ++(::ZeroType, a::Number) = a +size(xs::ZeroType) = xs.size +reshape(::ZeroType, args...) = ZeroType(args) +@adjoint reshape(xs::ZeroType, dims...) = + ZeroType(dims), Δ -> (ZeroType(size(xs)), map(_ -> nothing, dims)...) + """ @jit ... From 040697fb2bee3987f40bd5e8d3c3b6a815cbcfcf Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 8 Oct 2019 17:18:19 +0530 Subject: [PATCH 14/39] add bias and weight kwarg --- src/layers/conv.jl | 108 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 25 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index c7bda4ab..5dcd400c 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -3,16 +3,16 @@ using NNlib: conv, ∇conv_data, depthwiseconv expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) """ - Conv(size, in=>out) - Conv(size, in=>out, relu) + Conv(filter::Tuple, in=>out) + Conv(filter::Tuple, in=>out, activation) -Standard convolutional layer. `size` should be a tuple like `(2, 2)`. +Standard convolutional layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. Example: Applying Conv layer to a 1-channel input using a 2x2 window size, giving us a 16-channel output. Output is activated with ReLU. - size = (2,2) + filter = (2,2) in = 1 out = 16 Conv((2, 2), 1=>16, relu) @@ -34,7 +34,7 @@ end """ Conv(weight::AbstractArray, bias::AbstractArray) - Conv(weight::AbstractArray, bias::AbstractArray, relu) + Conv(weight::AbstractArray, bias::AbstractArray, activation) Constructs the convolutional layer with user defined weight and bias arrays. All other behaviours of the Conv layer apply with regard to data order and @@ -42,21 +42,32 @@ forward pass. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function Conv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; +function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return Conv(σ, w, b, stride, pad, dilation) end -convweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; - init = glorot_uniform) where N = init(k..., ch...) -const convbias = zeros +""" + convweight(filter::Tuple, in=>out) + +Constructs a standard convolutional weight matrix with given `filter` and +channels from `in` to `out`. + +Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling +distribution. + +See also: [`depthwiseconvweight`](@ref) +""" +convweight(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; + init = glorot_uniform) where N = init(filter..., ch...) function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, ch, init = init), bias = convbias(ch[2])) where N + weight = convweight(k, ch, init = init), bias = zeros(ch[2])) where N Conv(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) @@ -86,10 +97,10 @@ end a(T.(x)) """ - ConvTranspose(size, in=>out) - ConvTranspose(size, in=>out, relu) + ConvTranspose(filter::Tuple, in=>out) + ConvTranspose(filter::Tuple, in=>out, relu) -Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. +Standard convolutional transpose layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. Data should be stored in WHCN order. In other words, a 100×100 RGB image would @@ -106,17 +117,28 @@ struct ConvTranspose{N,M,F,A,V} dilation::NTuple{N,Int} end -function ConvTranspose(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; +""" + ConvTranspose(weight::AbstractArray, bias::AbstractArray) + ConvTranspose(weight::AbstractArray, bias::AbstractArray, activation) + +Constructs the convolutional transpose layer with user defined weight and bias arrays. +All other behaviours of the ConvTranspose layer apply with regard to data order and +forward pass. + +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return ConvTranspose(σ, w, b, stride, pad, dilation) end function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, reverse(ch), init = init), bias = convbias(ch[2])) where N + weight = convweight(k, reverse(ch), init = init), bias = zeros(ch[2])) where N ConvTranspose(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) @@ -157,11 +179,12 @@ end (a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) -""" - DepthwiseConv(size, in=>out) - DepthwiseConv(size, in=>out, relu) -Depthwise convolutional layer. `size` should be a tuple like `(2, 2)`. +""" + DepthwiseConv(filter::Tuple, in=>out) + DepthwiseConv(filter::Tuple, in=>out, relu) + +Depthwise convolutional layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. Note that `out` must be an integer multiple of `in`. @@ -179,21 +202,44 @@ struct DepthwiseConv{N,M,F,A,V} dilation::NTuple{N,Int} end -function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; +""" + DepthwiseConv(weight::AbstractArray, bias::AbstractArray) + DepthwiseConv(weight::AbstractArray, bias::AbstractArray, activation) + +Constructs the `DepthwiseConv` layer with user defined weight and bias arrays. +All other behaviours of the `DepthwiseConv` layer apply with regard to data order and +forward pass. + +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return DepthwiseConv(σ, w, b, stride, pad, dilation) end -depthwiseconvweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; - init = glorot_uniform) where N = init(k..., div(ch[2], ch[1]), ch[1]) +""" + depthwiseconvweight(filter::Tuple, in=>out) + +Constructs a depthwise convolutional weight array defined by `filter` and channels +from `in` to `out`. + +Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling +distribution. + +See also: [`convweight`](@ref) +""" +depthwiseconvweight(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; + init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1]) function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = depthwiseconvweight(k, ch, init = init), bias = convbias(ch[2])) where N + weight = depthwiseconvweight(k, ch, init = init), bias = zeros(ch[2])) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" + return DepthwiseConv( weight, bias, @@ -255,17 +301,29 @@ struct CrossCor{N,M,F,A,V} dilation::NTuple{N,Int} end -function CrossCor(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; +""" + CrossCor(weight::AbstractArray, bias::AbstractArray) + CrossCor(weight::AbstractArray, bias::AbstractArray, activation) + +Constructs the standard cross convolutional layer with user defined weight and bias +arrays. All other behaviours of the CrossCor layer apply with regard to data order and +forward pass. + +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return CrossCor(σ, w, b, stride, pad, dilation) end function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, ch, init = init), bias = convbias(ch[2])) where N + weight = convweight(k, ch, init = init), bias = zeros(ch[2])) where N + CrossCor(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) end From b596faaffabc31a48af433e6da5382defeeb8eb0 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 8 Oct 2019 17:18:39 +0530 Subject: [PATCH 15/39] tests bias switch --- test/layers/conv.jl | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 169c3077..5b3eb326 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -28,9 +28,25 @@ end op = bias(ip) @test sum(op) == prod(size(op)) - bias = Conv((2,2), 1=>3, bias = zero(3)) + bias = Conv((2,2), 1=>3, bias = Flux.ZeroType((3,))) op = bias(ip) @test sum(op) === 0.f0 + + # Train w/o bias and make sure no convergence happens + # when only bias can be converged + bias = Conv((2, 2), 1=>3, bias = Flux.ZeroType((3,))); + ip = zeros(Float32, 28,28,1,1) + op = zeros(Float32, 27,27,3,1) .+ 2.f0 + opt = Descent() + + for _ = 1:10^3 + gs = gradient(params(bias)) do + Flux.mse(bias(ip), op) + end + Flux.Optimise.update!(opt, params(bias), gs) + end + + @test Flux.mse(bias(ip), op) ≈ 4.f0 end @testset "asymmetric padding" begin From 95c5845e99e7d4ccde36f090a4f9f9fdbe865f9c Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 8 Oct 2019 17:54:01 +0530 Subject: [PATCH 16/39] document bias switch --- src/layers/conv.jl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 5dcd400c..58b6ccb5 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -21,6 +21,10 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. +Accepts keyword arguments `weight` and `bias` to set the corresponding fields. +Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the +layer. + Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct Conv{N,M,F,A,V} @@ -40,6 +44,9 @@ Constructs the convolutional layer with user defined weight and bias arrays. All other behaviours of the Conv layer apply with regard to data order and forward pass. +Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the +layer. + Takes the keyword arguments `pad`, `stride` and `dilation`. """ function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; @@ -106,6 +113,10 @@ Standard convolutional transpose layer. `filter` should be a tuple like `(2, 2)` Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. +Accepts keyword arguments `weight` and `bias` to set the corresponding fields. +Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the +layer. + Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct ConvTranspose{N,M,F,A,V} @@ -125,6 +136,9 @@ Constructs the convolutional transpose layer with user defined weight and bias a All other behaviours of the ConvTranspose layer apply with regard to data order and forward pass. +Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the +layer. + Takes the keyword arguments `pad`, `stride` and `dilation`. """ function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; @@ -191,6 +205,10 @@ Note that `out` must be an integer multiple of `in`. Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. +Accepts keyword arguments `weight` and `bias` to set the corresponding fields. +Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the +layer. + Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct DepthwiseConv{N,M,F,A,V} @@ -210,6 +228,9 @@ Constructs the `DepthwiseConv` layer with user defined weight and bias arrays. All other behaviours of the `DepthwiseConv` layer apply with regard to data order and forward pass. +Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the +layer. + Takes the keyword arguments `pad`, `stride` and `dilation`. """ function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; @@ -290,6 +311,10 @@ Data should be stored in WHCN order (width, height, # channels, # batches). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. +Accepts keyword arguments `weight` and `bias` to set the corresponding fields. +Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the +layer. + Takes the keyword arguments `pad`, `stride` and `dilation`. """ struct CrossCor{N,M,F,A,V} @@ -309,6 +334,9 @@ Constructs the standard cross convolutional layer with user defined weight and b arrays. All other behaviours of the CrossCor layer apply with regard to data order and forward pass. +Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the +layer. + Takes the keyword arguments `pad`, `stride` and `dilation`. """ function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; From 49ea43e711ea98f9d36184d08aa37832413f29f5 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 8 Oct 2019 20:02:04 +0530 Subject: [PATCH 17/39] ZeroType => Zeros --- src/layers/conv.jl | 32 ++++++++++++++++---------------- src/utils.jl | 28 ++++++++++++++++++++-------- test/layers/conv.jl | 4 ++-- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 58b6ccb5..ad9164c4 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -22,7 +22,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. Accepts keyword arguments `weight` and `bias` to set the corresponding fields. -Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the +Setting `bias` to `Flux.Zeros()` will switch bias off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. @@ -44,17 +44,17 @@ Constructs the convolutional layer with user defined weight and bias arrays. All other behaviours of the Conv layer apply with regard to data order and forward pass. -Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the +Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; +function Conv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b + b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b return Conv(σ, w, b, stride, pad, dilation) end @@ -114,7 +114,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. Accepts keyword arguments `weight` and `bias` to set the corresponding fields. -Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the +Setting `bias` to `Flux.Zeros()` will switch bias off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. @@ -136,17 +136,17 @@ Constructs the convolutional transpose layer with user defined weight and bias a All other behaviours of the ConvTranspose layer apply with regard to data order and forward pass. -Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the +Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; +function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b + b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b return ConvTranspose(σ, w, b, stride, pad, dilation) end @@ -206,7 +206,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. Accepts keyword arguments `weight` and `bias` to set the corresponding fields. -Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the +Setting `bias` to `Flux.Zeros()` will switch bias off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. @@ -228,17 +228,17 @@ Constructs the `DepthwiseConv` layer with user defined weight and bias arrays. All other behaviours of the `DepthwiseConv` layer apply with regard to data order and forward pass. -Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the +Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; +function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b + b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b return DepthwiseConv(σ, w, b, stride, pad, dilation) end @@ -312,7 +312,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. Accepts keyword arguments `weight` and `bias` to set the corresponding fields. -Setting `bias` to `Flux.ZeroType((out,))` will switch bias off for the +Setting `bias` to `Flux.Zeros()` will switch bias off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. @@ -334,17 +334,17 @@ Constructs the standard cross convolutional layer with user defined weight and b arrays. All other behaviours of the CrossCor layer apply with regard to data order and forward pass. -Setting `bias` to `nothing` or `Flux.ZeroType((out,))` would switch `bias` off for the +Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; +function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b + b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b return CrossCor(σ, w, b, stride, pad, dilation) end diff --git a/src/utils.jl b/src/utils.jl index a42c37d5..9e095811 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -141,16 +141,28 @@ end import Base: +, reshape, size -struct ZeroType{T} <: Number - size::T +""" + Zeros() + Zeros(T, a::Union{Colon, Int}...) + +Acts as a stand-in for an array of zeros that can be used during training which is +ignored by the optimisers. +""" +struct Zeros{T} <: Number + size::Tuple end -+(a::Number, ::ZeroType) = a -+(::ZeroType, a::Number) = a -size(xs::ZeroType) = xs.size -reshape(::ZeroType, args...) = ZeroType(args) -@adjoint reshape(xs::ZeroType, dims...) = - ZeroType(dims), Δ -> (ZeroType(size(xs)), map(_ -> nothing, dims)...) +Zeros(::Type{T}, sz...) where T = Zeros{T}(sz) +Zeros(sz::Union{Integer, Colon}...) = Zeros(Bool, sz...) + ++(a::Number, ::Zeros) = a ++(::Zeros, a::Number) = a + +size(xs::Zeros) = xs.size +reshape(z::Zeros{T}, args...) where T = Zeros(T, args...) + +@adjoint reshape(xs::Zeros{T}, dims...) where T = + Zeros(T, dims...), Δ -> (Zeros(T, size(xs)...), map(_ -> nothing, dims)...) """ @jit ... diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 5b3eb326..4a3f8c16 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -28,13 +28,13 @@ end op = bias(ip) @test sum(op) == prod(size(op)) - bias = Conv((2,2), 1=>3, bias = Flux.ZeroType((3,))) + bias = Conv((2,2), 1=>3, bias = Flux.Zeros()) op = bias(ip) @test sum(op) === 0.f0 # Train w/o bias and make sure no convergence happens # when only bias can be converged - bias = Conv((2, 2), 1=>3, bias = Flux.ZeroType((3,))); + bias = Conv((2, 2), 1=>3, bias = Flux.Zeros()); ip = zeros(Float32, 28,28,1,1) op = zeros(Float32, 27,27,3,1) .+ 2.f0 opt = Descent() From c85bad4427ca96631700c7c224317fa7fac7d439 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 8 Oct 2019 20:26:09 +0530 Subject: [PATCH 18/39] replace weight with filter --- src/layers/conv.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index ad9164c4..a60749e3 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -59,7 +59,7 @@ function Conv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}} end """ - convweight(filter::Tuple, in=>out) + convfilter(filter::Tuple, in=>out) Constructs a standard convolutional weight matrix with given `filter` and channels from `in` to `out`. @@ -67,14 +67,14 @@ channels from `in` to `out`. Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling distribution. -See also: [`depthwiseconvweight`](@ref) +See also: [`depthwiseconvfilter`](@ref) """ -convweight(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; +convfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; init = glorot_uniform) where N = init(filter..., ch...) function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, ch, init = init), bias = zeros(ch[2])) where N + weight = convfilter(k, ch, init = init), bias = zeros(ch[2])) where N Conv(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) @@ -152,7 +152,7 @@ end function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, reverse(ch), init = init), bias = zeros(ch[2])) where N + weight = convfilter(k, reverse(ch), init = init), bias = zeros(ch[2])) where N ConvTranspose(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) @@ -243,7 +243,7 @@ function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractV end """ - depthwiseconvweight(filter::Tuple, in=>out) + depthwiseconvfilter(filter::Tuple, in=>out) Constructs a depthwise convolutional weight array defined by `filter` and channels from `in` to `out`. @@ -251,14 +251,14 @@ from `in` to `out`. Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling distribution. -See also: [`convweight`](@ref) +See also: [`convfilter`](@ref) """ -depthwiseconvweight(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; +depthwiseconvfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1]) function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = depthwiseconvweight(k, ch, init = init), bias = zeros(ch[2])) where N + weight = depthwiseconvfilter(k, ch, init = init), bias = zeros(ch[2])) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" return DepthwiseConv( @@ -350,7 +350,7 @@ end function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, ch, init = init), bias = zeros(ch[2])) where N + weight = convfilter(k, ch, init = init), bias = zeros(ch[2])) where N CrossCor(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) From 4a183aeaf02a9de9a98f21ee5eddfd0e7f8219f4 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 22 Oct 2019 16:11:27 +0530 Subject: [PATCH 19/39] make Zeros a dimensionlesss number --- src/utils.jl | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 9e095811..ee5f2db7 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -139,30 +139,40 @@ function throttle(f, timeout; leading=true, trailing=false) end end -import Base: +, reshape, size +import Base: +, -, reshape, size +import Base.Broadcast: broadcasted """ Zeros() - Zeros(T, a::Union{Colon, Int}...) -Acts as a stand-in for an array of zeros that can be used during training which is -ignored by the optimisers. +Acts as a stand-in for an array of zeros that can be +used during training which is ignored by the optimisers. + +Used to turn bias off for a forward pass of a layer. + +## Examples + +```julia +julia> rand(3,3) .+ Flux.Zeros() +3×3 Array{Float64,2}: + 0.198739 0.490459 0.785386 + 0.779074 0.39986 0.66383 + 0.854981 0.447292 0.314497 + +julia> bias = Conv((2,2), 1=>3, bias = Flux.Zeros()) +Conv((2, 2), 1=>3) +``` """ -struct Zeros{T} <: Number - size::Tuple +struct Zeros <: Number end +for f in (:+, :-) + @eval $f(a::Union{Number, Zeros}, b::Zeros) = a end +Base.:*(a::Union{Number, Zeros}, b::Zeros) = zero(a) -Zeros(::Type{T}, sz...) where T = Zeros{T}(sz) -Zeros(sz::Union{Integer, Colon}...) = Zeros(Bool, sz...) - -+(a::Number, ::Zeros) = a -+(::Zeros, a::Number) = a - -size(xs::Zeros) = xs.size -reshape(z::Zeros{T}, args...) where T = Zeros(T, args...) - -@adjoint reshape(xs::Zeros{T}, dims...) where T = - Zeros(T, dims...), Δ -> (Zeros(T, size(xs)...), map(_ -> nothing, dims)...) +broadcasted(::typeof(+), arr::AbstractArray, ::Zeros) = arr +broadcasted(::typeof(*), arr::AbstractArray, ::Zeros) = zero(arr) +Base.reshape(xs::Zeros, args...) = xs +@adjoint reshape(xs::Zeros, dims...) = reshape(xs, dims...), _ -> nothing """ @jit ... From 7c90fb469d19585d63d95aeb28e68041af7e35b7 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 23 Oct 2019 20:02:15 +0530 Subject: [PATCH 20/39] use array to define Zeros --- src/utils.jl | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index ee5f2db7..155326ab 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -163,16 +163,39 @@ julia> bias = Conv((2,2), 1=>3, bias = Flux.Zeros()) Conv((2, 2), 1=>3) ``` """ -struct Zeros <: Number end -for f in (:+, :-) - @eval $f(a::Union{Number, Zeros}, b::Zeros) = a +struct Zeros{T,N} <: AbstractArray{T,N} + size::Tuple end -Base.:*(a::Union{Number, Zeros}, b::Zeros) = zero(a) + +Zeros(::Type{T}, sz...) where T = Zeros{T,length(sz)}(sz) +Zeros(sz::Integer...) = Zeros(Bool, sz...) + ++(a::Union{AbstractVecOrMat, Number}, ::Zeros) = a + +Base.size(xs::Zeros) = xs.size +Base.IndexStyle(::Type{<:Zeros}) = IndexLinear() + +Base.axes(xs::Zeros) = Base.OneTo.(size(xs)) + +Base.getindex(xs::Zeros{T,N}, i::Int) where {T,N} = zero(T) +Base.setindex(xs::Zeros, args...) = + error("setindex disallowed on Zeros Array") +Base.setindex!(xs::Zeros, args...) = + error("setindex! disallowed on Zeros Array") + +Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs)) + +@adjoint reshape(xs::Zeros{T}, dims...) where T = + reshape(xs, dims...), _ -> nothing + +for f in (:+, :-) + @eval $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = a +end +Base.:*(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = zero(a) broadcasted(::typeof(+), arr::AbstractArray, ::Zeros) = arr +broadcasted(::typeof(-), arr::AbstractArray, ::Zeros) = arr broadcasted(::typeof(*), arr::AbstractArray, ::Zeros) = zero(arr) -Base.reshape(xs::Zeros, args...) = xs -@adjoint reshape(xs::Zeros, dims...) = reshape(xs, dims...), _ -> nothing """ @jit ... From a4a987f0b0c7745a05f4322eaaa87f422ce990b6 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 7 Nov 2019 16:53:41 +0530 Subject: [PATCH 21/39] hook into bcasting --- src/utils.jl | 66 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 155326ab..6e5ab8a2 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -139,27 +139,45 @@ function throttle(f, timeout; leading=true, trailing=false) end end -import Base: +, -, reshape, size -import Base.Broadcast: broadcasted +import Base: +, -, *, reshape, size +import Base.Broadcast: broadcasted, Broadcasted, BroadcastStyle """ Zeros() + Zeros(size...) + Zeros(Type, size...) Acts as a stand-in for an array of zeros that can be used during training which is ignored by the optimisers. -Used to turn bias off for a forward pass of a layer. +Useful to turn bias off for a forward pass of a layer. + +!!! warning + Zeros acts a scalar while broadcasting, so does not + expand dims. Checks for shape compatibility by default. ## Examples ```julia +julia> Flux.Zeros(3,3) +3×3 Flux.Zeros{Bool,2}: + false false false + false false false + false false false + +julia> Flux.Zeros(Float32, 3,3) +3×3 Flux.Zeros{Float32,2}: + 0.0 0.0 0.0 + 0.0 0.0 0.0 + 0.0 0.0 0.0 + julia> rand(3,3) .+ Flux.Zeros() 3×3 Array{Float64,2}: 0.198739 0.490459 0.785386 0.779074 0.39986 0.66383 0.854981 0.447292 0.314497 -julia> bias = Conv((2,2), 1=>3, bias = Flux.Zeros()) +julia> bias_less_conv = Conv((2,2), 1=>3, bias = Flux.Zeros()) Conv((2, 2), 1=>3) ``` """ @@ -170,14 +188,15 @@ end Zeros(::Type{T}, sz...) where T = Zeros{T,length(sz)}(sz) Zeros(sz::Integer...) = Zeros(Bool, sz...) -+(a::Union{AbstractVecOrMat, Number}, ::Zeros) = a - Base.size(xs::Zeros) = xs.size -Base.IndexStyle(::Type{<:Zeros}) = IndexLinear() - Base.axes(xs::Zeros) = Base.OneTo.(size(xs)) -Base.getindex(xs::Zeros{T,N}, i::Int) where {T,N} = zero(T) +Base.IndexStyle(::Type{<:Zeros}) = IndexCartesian() + +Base.getindex(xs::Zeros{T,N}, I::Vararg{Int, N}) where {T,N} = zero(T) +Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} = + Zeros(T, inds.stop) + Base.setindex(xs::Zeros, args...) = error("setindex disallowed on Zeros Array") Base.setindex!(xs::Zeros, args...) = @@ -185,17 +204,40 @@ Base.setindex!(xs::Zeros, args...) = Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs)) +# Ignore during backwards pass @adjoint reshape(xs::Zeros{T}, dims...) where T = reshape(xs, dims...), _ -> nothing +# Define basic ops for f in (:+, :-) @eval $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = a end +Base.:+(a::Zeros, b::AbstractArray) = b +Base.:-(a::Zeros, b::AbstractArray) = -b Base.:*(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = zero(a) +Base.:*(a::Zeros, b::AbstractArray) = zero(a) -broadcasted(::typeof(+), arr::AbstractArray, ::Zeros) = arr -broadcasted(::typeof(-), arr::AbstractArray, ::Zeros) = arr -broadcasted(::typeof(*), arr::AbstractArray, ::Zeros) = zero(arr) +# Hook into broadcasting API - to allow using as a regular array +Base.BroadcastStyle(::Type{<:Zeros}) = Broadcast.ArrayStyle{Zeros}() +Broadcast.broadcastable(xs::Zeros) = xs +Base.BroadcastStyle(::Broadcast.ArrayStyle{Zeros}, ::Broadcast.DefaultArrayStyle{N}) where N = + Broadcast.ArrayStyle{Zeros}() + +function Base.similar(bc::Broadcasted{Broadcast.ArrayStyle{Flux.Zeros}}, ::Type{T}) where T + similar(Array{T}, axes(bc)) +end + +Base.copy(xs::Zeros{T,N}) where {T,N} = Zeros(T, size(xs)...) + +isZeros(x::Zeros) = true +isZeros(x) = false + +function Base.copyto!(dest::AbstractArray, bc::Broadcasted{Broadcast.ArrayStyle{Flux.Zeros}}) + bc = Broadcast.flatten(bc) + + i = isZeros(first(bc.args)) ? 2 : 1 # findfirst(!isZeros, bc.args) + dest .= bc.args[i] +end """ @jit ... From e89b8eba774fc32b3fd782352422fb88baee74e6 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 13 Nov 2019 01:12:26 +0530 Subject: [PATCH 22/39] fixes --- src/utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.jl b/src/utils.jl index 6e5ab8a2..ae2910cc 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -215,7 +215,7 @@ end Base.:+(a::Zeros, b::AbstractArray) = b Base.:-(a::Zeros, b::AbstractArray) = -b Base.:*(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = zero(a) -Base.:*(a::Zeros, b::AbstractArray) = zero(a) +Base.:*(a::Zeros, b::AbstractArray) = zero(b) # Hook into broadcasting API - to allow using as a regular array Base.BroadcastStyle(::Type{<:Zeros}) = Broadcast.ArrayStyle{Zeros}() From eb41715d26998d2ad711f1644ee0f7127dd01b14 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 19 Nov 2019 13:30:33 +0530 Subject: [PATCH 23/39] define manual rules --- src/utils.jl | 78 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index ae2910cc..57e62cca 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -191,54 +191,78 @@ Zeros(sz::Integer...) = Zeros(Bool, sz...) Base.size(xs::Zeros) = xs.size Base.axes(xs::Zeros) = Base.OneTo.(size(xs)) -Base.IndexStyle(::Type{<:Zeros}) = IndexCartesian() +Base.IndexStyle(::Type{<:Zeros}) = IndexLinear() -Base.getindex(xs::Zeros{T,N}, I::Vararg{Int, N}) where {T,N} = zero(T) +Base.getindex(xs::Zeros{T,N}, I::Int) where {T,N} = zero(T) Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} = - Zeros(T, inds.stop) + Zeros(T, inds.stop) Base.setindex(xs::Zeros, args...) = - error("setindex disallowed on Zeros Array") + error("setindex disallowed on Zeros Array") Base.setindex!(xs::Zeros, args...) = - error("setindex! disallowed on Zeros Array") + error("setindex! disallowed on Zeros Array") Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs)) -# Ignore during backwards pass @adjoint reshape(xs::Zeros{T}, dims...) where T = - reshape(xs, dims...), _ -> nothing + reshape(xs, dims...), _ -> nothing # Define basic ops for f in (:+, :-) - @eval $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = a + @eval function $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) + @assert size(a) == size(b) throw(DimensionMismatch("dimensions must match")) + a + end end -Base.:+(a::Zeros, b::AbstractArray) = b -Base.:-(a::Zeros, b::AbstractArray) = -b -Base.:*(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) = zero(a) -Base.:*(a::Zeros, b::AbstractArray) = zero(b) -# Hook into broadcasting API - to allow using as a regular array -Base.BroadcastStyle(::Type{<:Zeros}) = Broadcast.ArrayStyle{Zeros}() -Broadcast.broadcastable(xs::Zeros) = xs -Base.BroadcastStyle(::Broadcast.ArrayStyle{Zeros}, ::Broadcast.DefaultArrayStyle{N}) where N = - Broadcast.ArrayStyle{Zeros}() ++(a::Zeros, b::AbstractArray) = b + a +-(a::Zeros, b::AbstractArray) = -b + a -function Base.similar(bc::Broadcasted{Broadcast.ArrayStyle{Flux.Zeros}}, ::Type{T}) where T - similar(Array{T}, axes(bc)) +function *(a::AbstractArray{S,2}, b::Zeros{T,2}) where {T,S} + @assert size(a,2) == size(b,1) throw(DimensionMismatch("A has dimensions $(size(a)) but B has dimensions $(size(b))")) + res = similar(a, size(a,1), size(b,2)) + res .= zero(S) +end + +function *(a::Zeros{T,2}, b::AbstractArray{S,2}) where {T,S} + @assert size(a,2) == size(b,1) throw(DimensionMismatch("A has dimensions $(size(a)) but B has dimensions $(size(b))")) + res = similar(b, size(a,1), size(b,2)) + res .= zero(S) end Base.copy(xs::Zeros{T,N}) where {T,N} = Zeros(T, size(xs)...) -isZeros(x::Zeros) = true -isZeros(x) = false - -function Base.copyto!(dest::AbstractArray, bc::Broadcasted{Broadcast.ArrayStyle{Flux.Zeros}}) - bc = Broadcast.flatten(bc) - - i = isZeros(first(bc.args)) ? 2 : 1 # findfirst(!isZeros, bc.args) - dest .= bc.args[i] +# Define broadcasting behaviour +for op in (:+, :-) + @eval function broadcasted(::typeof($op), a::AbstractArray, b::Zeros) + sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) + sz .= a + end end +broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(typeof(+), b, a) +broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(typeof(+), -b, a) + +function broadcasted(::typeof(*), a::AbstractArray, b::Zeros) + sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) + sz .= zero(a) +end + +broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(typeof(*), b, a) + +for op in (:+, :-, :*) + @eval broadcasted(::typeof($op), a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...) +end + +# Some opportunities to avoid scalar indexing, intermediaries +broadcasted(::typeof(+), a::AbstractArray, b::Zeros{T,0}) where T = a +broadcasted(::typeof(+), a::Zeros{T,0}, b::AbstractArray) where T = b +broadcasted(::typeof(-), a::AbstractArray, b::Zeros{T,0}) where T = a +broadcasted(::typeof(-), a::Zeros{T,0}, b::AbstractArray) where T = -b +broadcasted(::typeof(*), a::AbstractArray, b::Zeros{T,0}) where T = zero(a) +broadcasted(::typeof(*), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) +broadcasted(::typeof(/), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) + """ @jit ... From 245563077b614e78d6b765b3d24aae0612decc0e Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 27 Nov 2019 19:40:58 +0530 Subject: [PATCH 24/39] cleaner API --- src/layers/conv.jl | 70 +++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index a60749e3..751689f5 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -22,8 +22,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. Accepts keyword arguments `weight` and `bias` to set the corresponding fields. -Setting `bias` to `Flux.Zeros()` will switch bias off for the -layer. +Setting `bias` to `Flux.Zeros()` will switch bias off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ @@ -44,17 +43,15 @@ Constructs the convolutional layer with user defined weight and bias arrays. All other behaviours of the Conv layer apply with regard to data order and forward pass. -Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the -layer. +Setting `bias` to `Flux.Zeros()` would switch `bias` off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function Conv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity; +function Conv(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b return Conv(σ, w, b, stride, pad, dilation) end @@ -70,14 +67,14 @@ distribution. See also: [`depthwiseconvfilter`](@ref) """ convfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; - init = glorot_uniform) where N = init(filter..., ch...) + init = glorot_uniform) where N = init(filter..., ch...) function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convfilter(k, ch, init = init), bias = zeros(ch[2])) where N + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = convfilter(k, ch, init = init), bias = zeros(ch[2])) where N Conv(weight, bias, σ, - stride = stride, pad = pad, dilation = dilation) + stride = stride, pad = pad, dilation = dilation) end @functor Conv @@ -114,8 +111,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. Accepts keyword arguments `weight` and `bias` to set the corresponding fields. -Setting `bias` to `Flux.Zeros()` will switch bias off for the -layer. +Setting `bias` to `Flux.Zeros()` will switch bias off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ @@ -136,23 +132,21 @@ Constructs the convolutional transpose layer with user defined weight and bias a All other behaviours of the ConvTranspose layer apply with regard to data order and forward pass. -Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the -layer. +Setting `bias` to `Flux.Zeros()` would switch `bias` off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} +function ConvTranspose(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b return ConvTranspose(σ, w, b, stride, pad, dilation) end function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convfilter(k, reverse(ch), init = init), bias = zeros(ch[2])) where N + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = convfilter(k, reverse(ch), init = init), bias = zeros(ch[2])) where N ConvTranspose(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) @@ -168,9 +162,9 @@ function conv_transpose_dims(c::ConvTranspose, x::AbstractArray) batch_size = size(x)[end] # Create DenseConvDims() that looks like the corresponding conv() return DenseConvDims((I..., C_in, batch_size), size(c.weight); - stride=c.stride, - padding=c.pad, - dilation=c.dilation, + stride=c.stride, + padding=c.pad, + dilation=c.dilation, ) end @@ -206,8 +200,7 @@ Data should be stored in WHCN order. In other words, a 100×100 RGB image would be a `100×100×3` array, and a batch of 50 would be a `100×100×3×50` array. Accepts keyword arguments `weight` and `bias` to set the corresponding fields. -Setting `bias` to `Flux.Zeros()` will switch bias off for the -layer. +Setting `bias` to `Flux.Zeros()` will switch bias off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ @@ -228,17 +221,15 @@ Constructs the `DepthwiseConv` layer with user defined weight and bias arrays. All other behaviours of the `DepthwiseConv` layer apply with regard to data order and forward pass. -Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the -layer. +Setting `bias` to `Flux.Zeros()` would switch `bias` off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} +function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b return DepthwiseConv(σ, w, b, stride, pad, dilation) end @@ -254,11 +245,11 @@ distribution. See also: [`convfilter`](@ref) """ depthwiseconvfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; - init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1]) + init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1]) function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = depthwiseconvfilter(k, ch, init = init), bias = zeros(ch[2])) where N + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = depthwiseconvfilter(k, ch, init = init), bias = zeros(ch[2])) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" return DepthwiseConv( @@ -312,8 +303,7 @@ In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. Accepts keyword arguments `weight` and `bias` to set the corresponding fields. -Setting `bias` to `Flux.Zeros()` will switch bias off for the -layer. +Setting `bias` to `Flux.Zeros()` will switch bias off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ @@ -334,23 +324,21 @@ Constructs the standard cross convolutional layer with user defined weight and b arrays. All other behaviours of the CrossCor layer apply with regard to data order and forward pass. -Setting `bias` to `nothing` or `Flux.Zeros()` would switch `bias` off for the -layer. +Setting `bias` to `Flux.Zeros()` would switch `bias` off for the layer. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, Zeros, AbstractVector{T}}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} +function CrossCor(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - b = b isa Nothing ? Zeros((size(w, ndims(w)), )) : b return CrossCor(σ, w, b, stride, pad, dilation) end function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convfilter(k, ch, init = init), bias = zeros(ch[2])) where N + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = convfilter(k, ch, init = init), bias = zeros(ch[2])) where N CrossCor(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) From ec872bb57905023388eb7bb808e475edfaefcfd9 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 27 Nov 2019 19:45:04 +0530 Subject: [PATCH 25/39] test that bias has no grads with Zeros --- test/layers/conv.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 4a3f8c16..d3345929 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -31,6 +31,8 @@ end bias = Conv((2,2), 1=>3, bias = Flux.Zeros()) op = bias(ip) @test sum(op) === 0.f0 + gs = gradient(() -> sum(bias(ip)), Flux.params(bias)) + @test gs[bias.bias] == nothing # Train w/o bias and make sure no convergence happens # when only bias can be converged From f39e1848144a023d3ac2ba8a0d105121c234e018 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 9 Dec 2019 21:07:30 +0530 Subject: [PATCH 26/39] rm Zeros warning --- src/utils.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 57e62cca..7f244724 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -152,10 +152,6 @@ used during training which is ignored by the optimisers. Useful to turn bias off for a forward pass of a layer. -!!! warning - Zeros acts a scalar while broadcasting, so does not - expand dims. Checks for shape compatibility by default. - ## Examples ```julia From 894c075b6d5d9a3d3a9c7ae6e7f279a5fc384977 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 9 Dec 2019 21:40:58 +0530 Subject: [PATCH 27/39] rm Zeros setindex --- src/utils.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 7f244724..ed0f95fc 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -193,11 +193,6 @@ Base.getindex(xs::Zeros{T,N}, I::Int) where {T,N} = zero(T) Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} = Zeros(T, inds.stop) -Base.setindex(xs::Zeros, args...) = - error("setindex disallowed on Zeros Array") -Base.setindex!(xs::Zeros, args...) = - error("setindex! disallowed on Zeros Array") - Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs)) @adjoint reshape(xs::Zeros{T}, dims...) where T = From a72ca2b05db5bb6e528627121eff832f5fbe64f6 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Mon, 9 Dec 2019 23:18:01 +0530 Subject: [PATCH 28/39] fix args --- src/utils.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index ed0f95fc..97bfd3cd 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -231,15 +231,15 @@ for op in (:+, :-) end end -broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(typeof(+), b, a) -broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(typeof(+), -b, a) +broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(+, b, a) +broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(+, -b, a) function broadcasted(::typeof(*), a::AbstractArray, b::Zeros) sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) sz .= zero(a) end -broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(typeof(*), b, a) +broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(*, b, a) for op in (:+, :-, :*) @eval broadcasted(::typeof($op), a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...) From b9fbee1ff024ae9dd66e4b9b4ffebdc42ead1c51 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 31 Jan 2020 12:24:36 +0530 Subject: [PATCH 29/39] ::typeof(op) -> op --- src/utils.jl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 97bfd3cd..3c8abb5e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -225,34 +225,34 @@ Base.copy(xs::Zeros{T,N}) where {T,N} = Zeros(T, size(xs)...) # Define broadcasting behaviour for op in (:+, :-) - @eval function broadcasted(::typeof($op), a::AbstractArray, b::Zeros) + @eval function broadcasted($op, a::AbstractArray, b::Zeros) sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) sz .= a end end -broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(+, b, a) -broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(+, -b, a) +broadcasted(+, a::Zeros, b::AbstractArray) = broadcasted(+, b, a) +broadcasted(-, a::Zeros, b::AbstractArray) = broadcasted(+, -b, a) -function broadcasted(::typeof(*), a::AbstractArray, b::Zeros) +function broadcasted(*, a::AbstractArray, b::Zeros) sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) sz .= zero(a) end -broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(*, b, a) +broadcasted(*, a::Zeros, b::AbstractArray) = broadcasted(*, b, a) for op in (:+, :-, :*) - @eval broadcasted(::typeof($op), a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...) + @eval broadcasted($op, a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...) end # Some opportunities to avoid scalar indexing, intermediaries -broadcasted(::typeof(+), a::AbstractArray, b::Zeros{T,0}) where T = a -broadcasted(::typeof(+), a::Zeros{T,0}, b::AbstractArray) where T = b -broadcasted(::typeof(-), a::AbstractArray, b::Zeros{T,0}) where T = a -broadcasted(::typeof(-), a::Zeros{T,0}, b::AbstractArray) where T = -b -broadcasted(::typeof(*), a::AbstractArray, b::Zeros{T,0}) where T = zero(a) -broadcasted(::typeof(*), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) -broadcasted(::typeof(/), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) +broadcasted(+, a::AbstractArray, b::Zeros{T,0}) where T = a +broadcasted(+, a::Zeros{T,0}, b::AbstractArray) where T = b +broadcasted(-, a::AbstractArray, b::Zeros{T,0}) where T = a +broadcasted(-, a::Zeros{T,0}, b::AbstractArray) where T = -b +broadcasted(*, a::AbstractArray, b::Zeros{T,0}) where T = zero(a) +broadcasted(*, a::Zeros{T,0}, b::AbstractArray) where T = zero(b) +broadcasted(/, a::Zeros{T,0}, b::AbstractArray) where T = zero(b) """ @jit ... From bc20103ea6dd1034951cd053fe0fc1a68f7b0bcf Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Fri, 31 Jan 2020 13:23:33 +0530 Subject: [PATCH 30/39] no-op copy --- src/utils.jl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 3c8abb5e..36bab5a9 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -221,38 +221,38 @@ function *(a::Zeros{T,2}, b::AbstractArray{S,2}) where {T,S} res .= zero(S) end -Base.copy(xs::Zeros{T,N}) where {T,N} = Zeros(T, size(xs)...) +Base.copy(xs::Zeros{T,N}) where {T,N} = xs # Define broadcasting behaviour for op in (:+, :-) - @eval function broadcasted($op, a::AbstractArray, b::Zeros) + @eval function broadcasted(::typeof($op), a::AbstractArray, b::Zeros) sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) sz .= a end end -broadcasted(+, a::Zeros, b::AbstractArray) = broadcasted(+, b, a) -broadcasted(-, a::Zeros, b::AbstractArray) = broadcasted(+, -b, a) +broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(+, b, a) +broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(+, -b, a) -function broadcasted(*, a::AbstractArray, b::Zeros) +function broadcasted(::typeof(*), a::AbstractArray, b::Zeros) sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) sz .= zero(a) end -broadcasted(*, a::Zeros, b::AbstractArray) = broadcasted(*, b, a) +broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(*, b, a) for op in (:+, :-, :*) - @eval broadcasted($op, a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...) + @eval broadcasted(::typeof($op), a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...) end # Some opportunities to avoid scalar indexing, intermediaries -broadcasted(+, a::AbstractArray, b::Zeros{T,0}) where T = a -broadcasted(+, a::Zeros{T,0}, b::AbstractArray) where T = b -broadcasted(-, a::AbstractArray, b::Zeros{T,0}) where T = a -broadcasted(-, a::Zeros{T,0}, b::AbstractArray) where T = -b -broadcasted(*, a::AbstractArray, b::Zeros{T,0}) where T = zero(a) -broadcasted(*, a::Zeros{T,0}, b::AbstractArray) where T = zero(b) -broadcasted(/, a::Zeros{T,0}, b::AbstractArray) where T = zero(b) +broadcasted(::typeof(+), a::AbstractArray, b::Zeros{T,0}) where T = a +broadcasted(::typeof(+), a::Zeros{T,0}, b::AbstractArray) where T = b +broadcasted(::typeof(-), a::AbstractArray, b::Zeros{T,0}) where T = a +broadcasted(::typeof(-), a::Zeros{T,0}, b::AbstractArray) where T = -b +broadcasted(::typeof(*), a::AbstractArray, b::Zeros{T,0}) where T = zero(a) +broadcasted(::typeof(*), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) +broadcasted(::typeof(/), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) """ @jit ... From f889d0c4d4fd03c5209d66f640e05f7c48cbd454 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 26 Feb 2020 22:19:17 +0530 Subject: [PATCH 31/39] add kwarg constructors --- src/layers/conv.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 751689f5..c2cc15bf 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -55,6 +55,11 @@ function Conv(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = id return Conv(σ, w, b, stride, pad, dilation) end +function Conv(;weight::AbstractArray, bias::Union{Zeros, AbstractVector{T}}, activation = identity, + stride = 1, pad = 0, dilation = 1) where {T,N} + Conv(weight, bias, activation, stride = stride, pad = pad, dilation = dilation) +end + """ convfilter(filter::Tuple, in=>out) @@ -144,6 +149,11 @@ function ConvTranspose(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}} return ConvTranspose(σ, w, b, stride, pad, dilation) end +function ConvTranspose(;weight::AbstractArray{T,N}, bias::Union{Zeros, AbstractVector{T}}, + activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N} + ConvTranspose(weight, bias, activation, stride = stride, pad = pad, dilation = dilation) +end + function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, weight = convfilter(k, reverse(ch), init = init), bias = zeros(ch[2])) where N @@ -233,6 +243,11 @@ function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}} return DepthwiseConv(σ, w, b, stride, pad, dilation) end +function DepthwiseConv(;weight::AbstractArray, bias::Union{Zeros, AbstractVector{T}}, + activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N} + DepthwiseConv(weight, bias, activation, stride = stride, pad = pad, dilation = dilation) +end + """ depthwiseconvfilter(filter::Tuple, in=>out) From 58211e31bd51408a1c80138d8ccbb7d27dbd8117 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 26 Feb 2020 22:22:11 +0530 Subject: [PATCH 32/39] docs improve --- src/layers/conv.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index c2cc15bf..5b19269a 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -107,7 +107,7 @@ end """ ConvTranspose(filter::Tuple, in=>out) - ConvTranspose(filter::Tuple, in=>out, relu) + ConvTranspose(filter::Tuple, in=>out, activation) Standard convolutional transpose layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. @@ -200,7 +200,7 @@ end """ DepthwiseConv(filter::Tuple, in=>out) - DepthwiseConv(filter::Tuple, in=>out, relu) + DepthwiseConv(filter::Tuple, in=>out, activation) Depthwise convolutional layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. From cf82393ae8aa1e1c44df28777f7a36bc765c3eb6 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 26 Feb 2020 22:36:25 +0530 Subject: [PATCH 33/39] type signatures --- src/layers/conv.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 94a10606..41f0e2e3 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -60,8 +60,8 @@ function Conv(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ = id return Conv(σ, w, b, stride, pad, dilation) end -function Conv(;weight::AbstractArray, bias::Union{Zeros, AbstractVector{T}}, activation = identity, - stride = 1, pad = 0, dilation = 1) where {T,N} +function Conv(;weight::AbstractArray{T,N}, bias::Union{Zeros, AbstractVector{T}}, + activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N} Conv(weight, bias, activation, stride = stride, pad = pad, dilation = dilation) end @@ -268,7 +268,7 @@ function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}} return DepthwiseConv(σ, w, b, stride, pad, dilation) end -function DepthwiseConv(;weight::AbstractArray, bias::Union{Zeros, AbstractVector{T}}, +function DepthwiseConv(;weight::AbstractArray{T,N}, bias::Union{Zeros, AbstractVector{T}}, activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N} DepthwiseConv(weight, bias, activation, stride = stride, pad = pad, dilation = dilation) end @@ -379,7 +379,7 @@ function CrossCor(w::AbstractArray{T,N}, b::Union{Zeros, AbstractVector{T}}, σ return CrossCor(σ, w, b, stride, pad, dilation) end -function CrossCor(;weight::AbstractArray, bias::Union{Zeros, AbstractVector{T}}, +function CrossCor(;weight::AbstractArray{T,N}, bias::Union{Zeros, AbstractVector{T}}, activation = identity, stride = 1, pad = 0, dilation = 1) where {T,N} CrossCor(weight, bias, activation, stride = stride, pad = pad, dilation = dilation) end From 20e78e274ecaccc7677373a891ddadcb5777dda7 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 26 Feb 2020 22:41:45 +0530 Subject: [PATCH 34/39] docs fix --- src/layers/conv.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 41f0e2e3..997b96e5 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -7,6 +7,7 @@ _convtransoutdims(isize, ksize, ssize, dsize, pad) = (isize .- 1).*ssize .+ 1 .+ expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) + """ Conv(filter::Tuple, in=>out) Conv(filter::Tuple, in=>out, activation) @@ -127,7 +128,7 @@ outdims(l::Conv, isize) = """ ConvTranspose(size, in=>out) - ConvTranspose(size, in=>out, relu) + ConvTranspose(size, in=>out, activation) Standard convolutional transpose layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. From 7e308e77fd4b4c60906772b61351d326605ae753 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 4 Mar 2020 17:57:16 +0530 Subject: [PATCH 35/39] rm unneccesary fns --- src/utils.jl | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index dbf85c95..6ad410b3 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -256,18 +256,6 @@ end +(a::Zeros, b::AbstractArray) = b + a -(a::Zeros, b::AbstractArray) = -b + a -function *(a::AbstractArray{S,2}, b::Zeros{T,2}) where {T,S} - @assert size(a,2) == size(b,1) throw(DimensionMismatch("A has dimensions $(size(a)) but B has dimensions $(size(b))")) - res = similar(a, size(a,1), size(b,2)) - res .= zero(S) -end - -function *(a::Zeros{T,2}, b::AbstractArray{S,2}) where {T,S} - @assert size(a,2) == size(b,1) throw(DimensionMismatch("A has dimensions $(size(a)) but B has dimensions $(size(b))")) - res = similar(b, size(a,1), size(b,2)) - res .= zero(S) -end - Base.copy(xs::Zeros{T,N}) where {T,N} = xs # Define broadcasting behaviour @@ -282,8 +270,7 @@ broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(+, b, a) broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(+, -b, a) function broadcasted(::typeof(*), a::AbstractArray, b::Zeros) - sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) - sz .= zero(a) + Zeros(Broadcast.broadcast_shape(size(a), size(b))...) end broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(*, b, a) From d8e44fcc1c4fe98f4b87668d498575f303813701 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 4 Mar 2020 18:22:45 +0530 Subject: [PATCH 36/39] correct broadcasting for addition --- src/utils.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 6ad410b3..5e8eb270 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -247,7 +247,7 @@ Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs)) # Define basic ops for f in (:+, :-) - @eval function $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) + @eval @inline function $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) @assert size(a) == size(b) throw(DimensionMismatch("dimensions must match")) a end @@ -261,7 +261,9 @@ Base.copy(xs::Zeros{T,N}) where {T,N} = xs # Define broadcasting behaviour for op in (:+, :-) @eval function broadcasted(::typeof($op), a::AbstractArray, b::Zeros) - sz = similar(a, Broadcast.broadcast_shape(size(a), size(b))) + bs = Broadcast.broadcast_shape(size(a), size(b)) + size(a) == bs && return a + sz = similar(a, bs) sz .= a end end From 534809ae78b6b09baa60c6d3e2f055d9a47d6db5 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 29 Apr 2020 16:15:35 +0530 Subject: [PATCH 37/39] move zeros to its own file --- src/Flux.jl | 1 + src/utils.jl | 104 --------------------------------------------------- src/zeros.jl | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 104 deletions(-) create mode 100644 src/zeros.jl diff --git a/src/Flux.jl b/src/Flux.jl index 5799fe42..90dcb630 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -27,6 +27,7 @@ using CuArrays const use_cuda = Ref(false) include("utils.jl") +include("zeros.jl") include("onehot.jl") include("functor.jl") diff --git a/src/utils.jl b/src/utils.jl index c321bd91..7842c961 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -321,110 +321,6 @@ function throttle(f, timeout; leading=true, trailing=false) end end -import Base: +, -, *, reshape, size -import Base.Broadcast: broadcasted, Broadcasted, BroadcastStyle - -""" - Zeros() - Zeros(size...) - Zeros(Type, size...) - -Acts as a stand-in for an array of zeros that can be -used during training which is ignored by the optimisers. - -Useful to turn bias off for a forward pass of a layer. - -## Examples - -```julia -julia> Flux.Zeros(3,3) -3×3 Flux.Zeros{Bool,2}: - false false false - false false false - false false false - -julia> Flux.Zeros(Float32, 3,3) -3×3 Flux.Zeros{Float32,2}: - 0.0 0.0 0.0 - 0.0 0.0 0.0 - 0.0 0.0 0.0 - -julia> rand(3,3) .+ Flux.Zeros() -3×3 Array{Float64,2}: - 0.198739 0.490459 0.785386 - 0.779074 0.39986 0.66383 - 0.854981 0.447292 0.314497 - -julia> bias_less_conv = Conv((2,2), 1=>3, bias = Flux.Zeros()) -Conv((2, 2), 1=>3) -``` -""" -struct Zeros{T,N} <: AbstractArray{T,N} - size::Tuple -end - -Zeros(::Type{T}, sz...) where T = Zeros{T,length(sz)}(sz) -Zeros(sz::Integer...) = Zeros(Bool, sz...) - -Base.size(xs::Zeros) = xs.size -Base.axes(xs::Zeros) = Base.OneTo.(size(xs)) - -Base.IndexStyle(::Type{<:Zeros}) = IndexLinear() - -Base.getindex(xs::Zeros{T,N}, I::Int) where {T,N} = zero(T) -Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} = - Zeros(T, inds.stop) - -Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs)) - -@adjoint reshape(xs::Zeros{T}, dims...) where T = - reshape(xs, dims...), _ -> nothing - -# Define basic ops -for f in (:+, :-) - @eval @inline function $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) - @assert size(a) == size(b) throw(DimensionMismatch("dimensions must match")) - a - end -end - -+(a::Zeros, b::AbstractArray) = b + a --(a::Zeros, b::AbstractArray) = -b + a - -Base.copy(xs::Zeros{T,N}) where {T,N} = xs - -# Define broadcasting behaviour -for op in (:+, :-) - @eval function broadcasted(::typeof($op), a::AbstractArray, b::Zeros) - bs = Broadcast.broadcast_shape(size(a), size(b)) - size(a) == bs && return a - sz = similar(a, bs) - sz .= a - end -end - -broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(+, b, a) -broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(+, -b, a) - -function broadcasted(::typeof(*), a::AbstractArray, b::Zeros) - Zeros(Broadcast.broadcast_shape(size(a), size(b))...) -end - -broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(*, b, a) - -for op in (:+, :-, :*) - @eval broadcasted(::typeof($op), a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...) -end - -# Some opportunities to avoid scalar indexing, intermediaries -broadcasted(::typeof(+), a::AbstractArray, b::Zeros{T,0}) where T = a -broadcasted(::typeof(+), a::Zeros{T,0}, b::AbstractArray) where T = b -broadcasted(::typeof(-), a::AbstractArray, b::Zeros{T,0}) where T = a -broadcasted(::typeof(-), a::Zeros{T,0}, b::AbstractArray) where T = -b -broadcasted(::typeof(*), a::AbstractArray, b::Zeros{T,0}) where T = zero(a) -broadcasted(::typeof(*), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) -broadcasted(::typeof(/), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) - """ @jit ... diff --git a/src/zeros.jl b/src/zeros.jl new file mode 100644 index 00000000..d281d3eb --- /dev/null +++ b/src/zeros.jl @@ -0,0 +1,103 @@ +import Base: +, -, *, reshape, size +import Base.Broadcast: broadcasted, Broadcasted, BroadcastStyle + +""" + Zeros() + Zeros(size...) + Zeros(Type, size...) + +Acts as a stand-in for an array of zeros that can be +used during training which is ignored by the optimisers. + +Useful to turn bias off for a forward pass of a layer. + +## Examples + +```julia +julia> Flux.Zeros(3,3) +3×3 Flux.Zeros{Bool,2}: + false false false + false false false + false false false + +julia> Flux.Zeros(Float32, 3,3) +3×3 Flux.Zeros{Float32,2}: + 0.0 0.0 0.0 + 0.0 0.0 0.0 + 0.0 0.0 0.0 + +julia> rand(3,3) .+ Flux.Zeros() +3×3 Array{Float64,2}: + 0.198739 0.490459 0.785386 + 0.779074 0.39986 0.66383 + 0.854981 0.447292 0.314497 + +julia> bias_less_conv = Conv((2,2), 1=>3, bias = Flux.Zeros()) +Conv((2, 2), 1=>3) +``` +""" +struct Zeros{T,N} <: AbstractArray{T,N} + size::Tuple +end + +Zeros(::Type{T}, sz...) where T = Zeros{T,length(sz)}(sz) +Zeros(sz::Integer...) = Zeros(Bool, sz...) + +Base.size(xs::Zeros) = xs.size +Base.axes(xs::Zeros) = Base.OneTo.(size(xs)) + +Base.IndexStyle(::Type{<:Zeros}) = IndexLinear() + +Base.getindex(xs::Zeros{T,N}, I::Int) where {T,N} = zero(T) +Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} = + Zeros(T, inds.stop) + +Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs)) + +@adjoint reshape(xs::Zeros{T}, dims...) where T = + reshape(xs, dims...), _ -> nothing + +# Define basic ops +for f in (:+, :-) + @eval @inline function $f(a::Union{AbstractArray{<:Number}, Zeros}, b::Zeros) + @assert size(a) == size(b) throw(DimensionMismatch("dimensions must match")) + a + end +end + ++(a::Zeros, b::AbstractArray) = b + a +-(a::Zeros, b::AbstractArray) = -b + a + +Base.copy(xs::Zeros{T,N}) where {T,N} = xs + +# Define broadcasting behaviour +for op in (:+, :-) + @eval function broadcasted(::typeof($op), a::AbstractArray, b::Zeros) + bs = Broadcast.broadcast_shape(size(a), size(b)) + size(a) == bs && return a + sz = similar(a, bs) + sz .= a + end +end + +broadcasted(::typeof(+), a::Zeros, b::AbstractArray) = broadcasted(+, b, a) +broadcasted(::typeof(-), a::Zeros, b::AbstractArray) = broadcasted(+, -b, a) + +function broadcasted(::typeof(*), a::AbstractArray, b::Zeros) + Zeros(Broadcast.broadcast_shape(size(a), size(b))...) +end + +broadcasted(::typeof(*), a::Zeros, b::AbstractArray) = broadcasted(*, b, a) + +for op in (:+, :-, :*) + @eval broadcasted(::typeof($op), a::Zeros, b::Zeros) = Zeros(Broadcast.broadcast_shape(size(a), size(b))...) +end + +# Some opportunities to avoid scalar indexing, intermediaries +broadcasted(::typeof(+), a::AbstractArray, b::Zeros{T,0}) where T = a +broadcasted(::typeof(+), a::Zeros{T,0}, b::AbstractArray) where T = b +broadcasted(::typeof(-), a::AbstractArray, b::Zeros{T,0}) where T = a +broadcasted(::typeof(-), a::Zeros{T,0}, b::AbstractArray) where T = -b +broadcasted(::typeof(*), a::AbstractArray, b::Zeros{T,0}) where T = zero(a) +broadcasted(::typeof(*), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) +broadcasted(::typeof(/), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) \ No newline at end of file From 29215fa5d7c8d82c5d8d19cb3cb7fafc9ef34324 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Wed, 29 Apr 2020 16:17:44 +0530 Subject: [PATCH 38/39] comment on possible future deprecations --- src/zeros.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/zeros.jl b/src/zeros.jl index d281d3eb..d31adcd9 100644 --- a/src/zeros.jl +++ b/src/zeros.jl @@ -94,6 +94,9 @@ for op in (:+, :-, :*) end # Some opportunities to avoid scalar indexing, intermediaries +# Since it replicates a little of what we expect Base to do, +# it should be possible to remove in the future, but for now, +# these help with performance. broadcasted(::typeof(+), a::AbstractArray, b::Zeros{T,0}) where T = a broadcasted(::typeof(+), a::Zeros{T,0}, b::AbstractArray) where T = b broadcasted(::typeof(-), a::AbstractArray, b::Zeros{T,0}) where T = a From 8f877f2dbfd4937f52615a0b798a0d582c456da1 Mon Sep 17 00:00:00 2001 From: Mike J Innes Date: Fri, 1 May 2020 14:22:46 +0100 Subject: [PATCH 39/39] quick fix --- src/zeros.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zeros.jl b/src/zeros.jl index d31adcd9..1aec7b02 100644 --- a/src/zeros.jl +++ b/src/zeros.jl @@ -50,7 +50,7 @@ Base.IndexStyle(::Type{<:Zeros}) = IndexLinear() Base.getindex(xs::Zeros{T,N}, I::Int) where {T,N} = zero(T) Base.getindex(xs::Zeros{T,N}, inds::Union{Base.OneTo, Base.UnitRange}) where {T,N} = - Zeros(T, inds.stop) + Zeros(T, length(inds)) Base.collect(xs::Zeros{T,N}) where {T,N} = fill(zero(T), size(xs)) @@ -103,4 +103,4 @@ broadcasted(::typeof(-), a::AbstractArray, b::Zeros{T,0}) where T = a broadcasted(::typeof(-), a::Zeros{T,0}, b::AbstractArray) where T = -b broadcasted(::typeof(*), a::AbstractArray, b::Zeros{T,0}) where T = zero(a) broadcasted(::typeof(*), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) -broadcasted(::typeof(/), a::Zeros{T,0}, b::AbstractArray) where T = zero(b) \ No newline at end of file +broadcasted(::typeof(/), a::Zeros{T,0}, b::AbstractArray) where T = zero(b)