From 040697fb2bee3987f40bd5e8d3c3b6a815cbcfcf Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Tue, 8 Oct 2019 17:18:19 +0530 Subject: [PATCH] add bias and weight kwarg --- src/layers/conv.jl | 108 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 25 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index c7bda4ab..5dcd400c 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -3,16 +3,16 @@ using NNlib: conv, ∇conv_data, depthwiseconv expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) """ - Conv(size, in=>out) - Conv(size, in=>out, relu) + Conv(filter::Tuple, in=>out) + Conv(filter::Tuple, in=>out, activation) -Standard convolutional layer. `size` should be a tuple like `(2, 2)`. +Standard convolutional layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. Example: Applying Conv layer to a 1-channel input using a 2x2 window size, giving us a 16-channel output. Output is activated with ReLU. - size = (2,2) + filter = (2,2) in = 1 out = 16 Conv((2, 2), 1=>16, relu) @@ -34,7 +34,7 @@ end """ Conv(weight::AbstractArray, bias::AbstractArray) - Conv(weight::AbstractArray, bias::AbstractArray, relu) + Conv(weight::AbstractArray, bias::AbstractArray, activation) Constructs the convolutional layer with user defined weight and bias arrays. All other behaviours of the Conv layer apply with regard to data order and @@ -42,21 +42,32 @@ forward pass. Takes the keyword arguments `pad`, `stride` and `dilation`. """ -function Conv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; +function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return Conv(σ, w, b, stride, pad, dilation) end -convweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; - init = glorot_uniform) where N = init(k..., ch...) -const convbias = zeros +""" + convweight(filter::Tuple, in=>out) + +Constructs a standard convolutional weight matrix with given `filter` and +channels from `in` to `out`. + +Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling +distribution. + +See also: [`depthwiseconvweight`](@ref) +""" +convweight(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; + init = glorot_uniform) where N = init(filter..., ch...) function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, ch, init = init), bias = convbias(ch[2])) where N + weight = convweight(k, ch, init = init), bias = zeros(ch[2])) where N Conv(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) @@ -86,10 +97,10 @@ end a(T.(x)) """ - ConvTranspose(size, in=>out) - ConvTranspose(size, in=>out, relu) + ConvTranspose(filter::Tuple, in=>out) + ConvTranspose(filter::Tuple, in=>out, relu) -Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`. +Standard convolutional transpose layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. Data should be stored in WHCN order. In other words, a 100×100 RGB image would @@ -106,17 +117,28 @@ struct ConvTranspose{N,M,F,A,V} dilation::NTuple{N,Int} end -function ConvTranspose(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; +""" + ConvTranspose(weight::AbstractArray, bias::AbstractArray) + ConvTranspose(weight::AbstractArray, bias::AbstractArray, activation) + +Constructs the convolutional transpose layer with user defined weight and bias arrays. +All other behaviours of the ConvTranspose layer apply with regard to data order and +forward pass. + +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return ConvTranspose(σ, w, b, stride, pad, dilation) end function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, reverse(ch), init = init), bias = convbias(ch[2])) where N + weight = convweight(k, reverse(ch), init = init), bias = zeros(ch[2])) where N ConvTranspose(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) @@ -157,11 +179,12 @@ end (a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) -""" - DepthwiseConv(size, in=>out) - DepthwiseConv(size, in=>out, relu) -Depthwise convolutional layer. `size` should be a tuple like `(2, 2)`. +""" + DepthwiseConv(filter::Tuple, in=>out) + DepthwiseConv(filter::Tuple, in=>out, relu) + +Depthwise convolutional layer. `filter` should be a tuple like `(2, 2)`. `in` and `out` specify the number of input and output channels respectively. Note that `out` must be an integer multiple of `in`. @@ -179,21 +202,44 @@ struct DepthwiseConv{N,M,F,A,V} dilation::NTuple{N,Int} end -function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; +""" + DepthwiseConv(weight::AbstractArray, bias::AbstractArray) + DepthwiseConv(weight::AbstractArray, bias::AbstractArray, activation) + +Constructs the `DepthwiseConv` layer with user defined weight and bias arrays. +All other behaviours of the `DepthwiseConv` layer apply with regard to data order and +forward pass. + +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return DepthwiseConv(σ, w, b, stride, pad, dilation) end -depthwiseconvweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; - init = glorot_uniform) where N = init(k..., div(ch[2], ch[1]), ch[1]) +""" + depthwiseconvweight(filter::Tuple, in=>out) + +Constructs a depthwise convolutional weight array defined by `filter` and channels +from `in` to `out`. + +Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling +distribution. + +See also: [`convweight`](@ref) +""" +depthwiseconvweight(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; + init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1]) function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = depthwiseconvweight(k, ch, init = init), bias = convbias(ch[2])) where N + weight = depthwiseconvweight(k, ch, init = init), bias = zeros(ch[2])) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" + return DepthwiseConv( weight, bias, @@ -255,17 +301,29 @@ struct CrossCor{N,M,F,A,V} dilation::NTuple{N,Int} end -function CrossCor(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity; +""" + CrossCor(weight::AbstractArray, bias::AbstractArray) + CrossCor(weight::AbstractArray, bias::AbstractArray, activation) + +Constructs the standard cross convolutional layer with user defined weight and bias +arrays. All other behaviours of the CrossCor layer apply with regard to data order and +forward pass. + +Takes the keyword arguments `pad`, `stride` and `dilation`. +""" +function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity; stride = 1, pad = 0, dilation = 1) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) + b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b return CrossCor(σ, w, b, stride, pad, dilation) end function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = convweight(k, ch, init = init), bias = convbias(ch[2])) where N + weight = convweight(k, ch, init = init), bias = zeros(ch[2])) where N + CrossCor(weight, bias, σ, stride = stride, pad = pad, dilation = dilation) end