add bias and weight kwarg

2019-10-08 17:18:19 +05:30 · 2019-10-08 17:18:19 +05:30 · 040697fb2b
commit 040697fb2b
parent f3904b4e04
1 changed files with 83 additions and 25 deletions
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@ -3,16 +3,16 @@ using NNlib: conv, ∇conv_data, depthwiseconv
 expand(N, i::Tuple) = i
 expand(N, i::Integer) = ntuple(_ -> i, N)
 """
-    Conv(size, in=>out)
+    Conv(filter::Tuple, in=>out)
-    Conv(size, in=>out, relu)
+    Conv(filter::Tuple, in=>out, activation)
-Standard convolutional layer. `size` should be a tuple like `(2, 2)`.
+Standard convolutional layer. `filter` should be a tuple like `(2, 2)`.
 `in` and `out` specify the number of input and output channels respectively.
 Example: Applying Conv layer to a 1-channel input using a 2x2 window size,
         giving us a 16-channel output. Output is activated with ReLU.
-    size = (2,2)
+    filter = (2,2)
    in = 1
    out = 16
    Conv((2, 2), 1=>16, relu)
@ -34,7 +34,7 @@ end
 """
    Conv(weight::AbstractArray, bias::AbstractArray)
-    Conv(weight::AbstractArray, bias::AbstractArray, relu)
+    Conv(weight::AbstractArray, bias::AbstractArray, activation)
 Constructs the convolutional layer with user defined weight and bias arrays.
 All other behaviours of the Conv layer apply with regard to data order and
@ -42,21 +42,32 @@ forward pass.
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
-function Conv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity;
+function Conv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
  b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
  return Conv(σ, w, b, stride, pad, dilation)
 end
-convweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
+"""
-  init = glorot_uniform) where N = init(k..., ch...)
+    convweight(filter::Tuple, in=>out)
-const convbias = zeros
+
 Constructs a standard convolutional weight matrix with given `filter` and
 channels from `in` to `out`.
 Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling
 distribution.
 See also: [`depthwiseconvweight`](@ref)
 """
 convweight(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
  init = glorot_uniform) where N = init(filter..., ch...)
 function Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
     init = glorot_uniform,  stride = 1, pad = 0, dilation = 1,
-     weight = convweight(k, ch, init = init), bias = convbias(ch[2])) where N
+     weight = convweight(k, ch, init = init), bias = zeros(ch[2])) where N
  Conv(weight, bias, σ,
       stride = stride, pad = pad, dilation = dilation)
@ -86,10 +97,10 @@ end
  a(T.(x))
 """
-    ConvTranspose(size, in=>out)
+    ConvTranspose(filter::Tuple, in=>out)
-    ConvTranspose(size, in=>out, relu)
+    ConvTranspose(filter::Tuple, in=>out, relu)
-Standard convolutional transpose layer. `size` should be a tuple like `(2, 2)`.
+Standard convolutional transpose layer. `filter` should be a tuple like `(2, 2)`.
 `in` and `out` specify the number of input and output channels respectively.
 Data should be stored in WHCN order. In other words, a 100×100 RGB image would
@ -106,17 +117,28 @@ struct ConvTranspose{N,M,F,A,V}
  dilation::NTuple{N,Int}
 end
-function ConvTranspose(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity;
+"""
    ConvTranspose(weight::AbstractArray, bias::AbstractArray)
    ConvTranspose(weight::AbstractArray, bias::AbstractArray, activation)
 Constructs the convolutional transpose layer with user defined weight and bias arrays.
 All other behaviours of the ConvTranspose layer apply with regard to data order and
 forward pass.
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
 function ConvTranspose(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
  b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
  return ConvTranspose(σ, w, b, stride, pad, dilation)
 end
 function ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
              init = glorot_uniform, stride = 1, pad = 0, dilation = 1,
-              weight = convweight(k, reverse(ch), init = init), bias = convbias(ch[2])) where N
+              weight = convweight(k, reverse(ch), init = init), bias = zeros(ch[2])) where N
  ConvTranspose(weight, bias, σ,
              stride = stride, pad = pad, dilation = dilation)
@ -157,11 +179,12 @@ end
 (a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} =
  a(T.(x))
 """
    DepthwiseConv(size, in=>out)
    DepthwiseConv(size, in=>out, relu)
-Depthwise convolutional layer. `size` should be a tuple like `(2, 2)`.
+"""
    DepthwiseConv(filter::Tuple, in=>out)
    DepthwiseConv(filter::Tuple, in=>out, relu)
 Depthwise convolutional layer. `filter` should be a tuple like `(2, 2)`.
 `in` and `out` specify the number of input and output channels respectively.
 Note that `out` must be an integer multiple of `in`.
@ -179,21 +202,44 @@ struct DepthwiseConv{N,M,F,A,V}
  dilation::NTuple{N,Int}
 end
-function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity;
+"""
    DepthwiseConv(weight::AbstractArray, bias::AbstractArray)
    DepthwiseConv(weight::AbstractArray, bias::AbstractArray, activation)
 Constructs the `DepthwiseConv` layer with user defined weight and bias arrays.
 All other behaviours of the `DepthwiseConv` layer apply with regard to data order and
 forward pass.
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
 function DepthwiseConv(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
                       stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
  b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
  return DepthwiseConv(σ, w, b, stride, pad, dilation)
 end
-depthwiseconvweight(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
+"""
-  init = glorot_uniform) where N = init(k..., div(ch[2], ch[1]), ch[1])
+    depthwiseconvweight(filter::Tuple, in=>out)
 Constructs a depthwise convolutional weight array defined by `filter` and channels
 from `in` to `out`.
 Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling
 distribution.
 See also: [`convweight`](@ref)
 """
 depthwiseconvweight(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
  init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1])
 function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
     init = glorot_uniform, stride = 1, pad = 0, dilation = 1,
-     weight = depthwiseconvweight(k, ch, init = init), bias = convbias(ch[2])) where N
+     weight = depthwiseconvweight(k, ch, init = init), bias = zeros(ch[2])) where N
  @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels"
  return DepthwiseConv(
    weight,
    bias,
@ -255,17 +301,29 @@ struct CrossCor{N,M,F,A,V}
  dilation::NTuple{N,Int}
 end
-function CrossCor(w::AbstractArray{T,N}, b::Union{Number, AbstractVector{T}}, σ = identity;
+"""
    CrossCor(weight::AbstractArray, bias::AbstractArray)
    CrossCor(weight::AbstractArray, bias::AbstractArray, activation)
 Constructs the standard cross convolutional layer with user defined weight and bias
 arrays. All other behaviours of the CrossCor layer apply with regard to data order and
 forward pass.
 Takes the keyword arguments `pad`, `stride` and `dilation`.
 """
 function CrossCor(w::AbstractArray{T,N}, b::Union{Nothing, ZeroType, AbstractVector{T}}, σ = identity;
              stride = 1, pad = 0, dilation = 1) where {T,N}
  stride = expand(Val(N-2), stride)
  pad = expand(Val(2*(N-2)), pad)
  dilation = expand(Val(N-2), dilation)
  b = b isa Nothing ? ZeroType((size(w, ndims(w)), )) : b
  return CrossCor(σ, w, b, stride, pad, dilation)
 end
 function CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
     init = glorot_uniform, stride = 1, pad = 0, dilation = 1,
-     weight = convweight(k, ch, init = init), bias = convbias(ch[2])) where N
+     weight = convweight(k, ch, init = init), bias = zeros(ch[2])) where N
  CrossCor(weight, bias, σ,
       stride = stride, pad = pad, dilation = dilation)
 end