diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 26a34306..a427c143 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -92,20 +92,21 @@ struct ConvTranspose{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} + use_bias::Bool end function ConvTranspose(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} + stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return ConvTranspose(σ, w, b, stride, pad, dilation) + return ConvTranspose(σ, w, b, stride, pad, dilation, use_bias) end ConvTranspose(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N = + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N = ConvTranspose(init(k..., reverse(ch)...), zeros(ch[2]), σ, - stride = stride, pad = pad, dilation = dilation) + stride = stride, pad = pad, dilation = dilation, use_bias = use_bias) @functor ConvTranspose @@ -125,9 +126,13 @@ end function (c::ConvTranspose)(x::AbstractArray) # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = conv_transpose_dims(c, x) - return σ.(∇conv_data(x, c.weight, cdims) .+ b) + if c.use_bias + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(∇conv_data(x, c.weight, cdims) .+ b) + else + c.σ.(∇conv_data(x, c.weight, cdims)) + end end function Base.show(io::IO, l::ConvTranspose) @@ -162,18 +167,19 @@ struct DepthwiseConv{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} + use_bias::Bool end function DepthwiseConv(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} + stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return DepthwiseConv(σ, w, b, stride, pad, dilation) + return DepthwiseConv(σ, w, b, stride, pad, dilation, use_bias) end function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" return DepthwiseConv( init(k..., div(ch[2], ch[1]), ch[1]), @@ -181,16 +187,21 @@ function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = σ; stride = stride, pad = pad, - dilation = dilation + dilation = dilation, + use_bias = use_bias ) end @functor DepthwiseConv function (c::DepthwiseConv)(x) - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - σ.(depthwiseconv(x, c.weight, cdims) .+ b) + if c.use_bias + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(depthwiseconv(x, c.weight, cdims) .+ b) + else + c.σ.(depthwiseconv(x, c.weight, cdims)) + end end function Base.show(io::IO, l::DepthwiseConv) @@ -234,20 +245,21 @@ struct CrossCor{N,M,F,A,V} stride::NTuple{N,Int} pad::NTuple{M,Int} dilation::NTuple{N,Int} + use_bias::Bool end function CrossCor(w::AbstractArray{T,N}, b::AbstractVector{T}, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} + stride = 1, pad = 0, dilation = 1, use_bias = true) where {T,N} stride = expand(Val(N-2), stride) pad = expand(Val(2*(N-2)), pad) dilation = expand(Val(N-2), dilation) - return CrossCor(σ, w, b, stride, pad, dilation) + return CrossCor(σ, w, b, stride, pad, dilation, use_bias) end CrossCor(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1) where N = + init = glorot_uniform, stride = 1, pad = 0, dilation = 1, use_bias = true) where N = CrossCor(init(k..., ch...), zeros(ch[2]), σ, - stride = stride, pad = pad, dilation = dilation) + stride = stride, pad = pad, dilation = dilation, use_bias = use_bias) @functor CrossCor @@ -259,9 +271,13 @@ end function (c::CrossCor)(x::AbstractArray) # TODO: breaks gpu broadcast :( # ndims(x) == ndims(c.weight)-1 && return squeezebatch(c(reshape(x, size(x)..., 1))) - σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - σ.(crosscor(x, c.weight, cdims) .+ b) + if c.use_bias + σ, b = c.σ, reshape(c.bias, map(_->1, c.stride)..., :, 1) + σ.(crosscor(x, c.weight, cdims) .+ b) + else + c.σ.(crosscor(x, c.weight, cdims)) + end end function Base.show(io::IO, l::CrossCor)