From b4ed16ad9cd52905a94ea18b70148724998742ab Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Tue, 3 Dec 2019 22:48:48 -0600 Subject: [PATCH 01/23] Added outdims for some basic layers --- src/layers/basic.jl | 35 +++++++++++++++++++++++++++++++ src/layers/conv.jl | 51 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 2a465208..f2e7645d 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -39,6 +39,17 @@ function Base.show(io::IO, c::Chain) print(io, ")") end +""" + outdims(c::Chain, isize::Tuple) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = Chain(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32)) +outdims(m, (10, 10)) == (6, 6) +``` +""" +outdims(c::Chain, isize::Tuple) = foldl(∘, map(l -> (x -> outdims(l, x)), c.layers)) # This is a temporary and naive implementation # it might be replaced in the future for better performance @@ -116,6 +127,19 @@ end (a::Dense{<:Any,W})(x::AbstractArray{<:AbstractFloat}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) +""" + outdims(l::Dense, isize) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = Dense(10, 5) +outdims(m, (5, 2)) == (5,) +outdims(m, (10,)) == (5,) +``` +""" +outdims(l::Dense, isize) = (size(l.W)[2],) + """ Diagonal(in::Integer) @@ -145,6 +169,17 @@ function Base.show(io::IO, l::Diagonal) print(io, "Diagonal(", length(l.α), ")") end +""" + outdims(l::Diagonal, isize) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = Diagonal(10) +outdims(m, (10,)) == (10,) +``` +""" +outdims(l::Diagonal, isize) = (length(l.α),) """ Maxout(over) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index f4de3ffc..eeeea82b 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,5 +1,7 @@ using NNlib: conv, ∇conv_data, depthwiseconv +_convoutdims(isize, ksize, ssize, pad) = Int.(floor.((isize .- ksize .+ 2 .* pad) ./ ssize .+ 1)) + expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) """ @@ -68,6 +70,18 @@ end (a::Conv{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) +""" + outdims(l::Conv, isize::Tuple) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = Conv((3, 3), 3 => 16) +outdims(m, (10, 10)) == (8, 8) +``` +""" +outdims(l::Conv{N}, isize) where N = _convoutdims(isize, size(l.weight)[1:N], l.stride, l.pad[1:N]) + """ ConvTranspose(size, in=>out) ConvTranspose(size, in=>out, relu) @@ -140,6 +154,7 @@ end (a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) + """ DepthwiseConv(size, in=>out) DepthwiseConv(size, in=>out, relu) @@ -204,6 +219,18 @@ end (a::DepthwiseConv{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) +""" + outdims(l::DepthwiseConv, isize::Tuple) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = DepthwiseConv((3, 3), 3 => 16) +outdims(m, (10, 10)) == (8, 8) +``` +""" +outdims(l::DepthwiseConv{N}, isize) where N = _convoutdims(isize, size(l.weight)[1:N], l.stride, l.pad[1:N]) + """ CrossCor(size, in=>out) CrossCor(size, in=>out, relu) @@ -304,6 +331,18 @@ function Base.show(io::IO, m::MaxPool) print(io, "MaxPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")") end +""" + outdims(l::MaxPool, isize::Tuple) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = MaxPool((2, 2)) +outdims(m, (10, 10)) == (5, 5) +``` +""" +outdims(l::MaxPool{N}, isize) where N = _convoutdims(isize, l.weight, l.stride, l.pad[1:N]) + """ MeanPool(k) @@ -331,3 +370,15 @@ end function Base.show(io::IO, m::MeanPool) print(io, "MeanPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")") end + +""" + outdims(l::MeanPool, isize::Tuple) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = MeanPool((2, 2)) +outdims(m, (10, 10)) == (5, 5) +``` +""" +outdims(l::MeanPool{N}, isize) where N = _convoutdims(isize, l.weight, l.stride, l.pad[1:N]) \ No newline at end of file From 31dda0ce6cd8c264d083d453823f4f13fa755da5 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Thu, 5 Dec 2019 21:57:10 -0600 Subject: [PATCH 02/23] Updated with all basic and conv layers outdims --- src/layers/basic.jl | 16 ++++++++++++++-- src/layers/conv.jl | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index f2e7645d..8794b58c 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -40,7 +40,7 @@ function Base.show(io::IO, c::Chain) end """ - outdims(c::Chain, isize::Tuple) + outdims(c::Chain, isize) Calculate the output dimensions given the input dimensions, `isize`. @@ -49,7 +49,7 @@ m = Chain(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32)) outdims(m, (10, 10)) == (6, 6) ``` """ -outdims(c::Chain, isize::Tuple) = foldl(∘, map(l -> (x -> outdims(l, x)), c.layers)) +outdims(c::Chain, isize) = foldl(∘, map(l -> (x -> outdims(l, x)), c.layers)) # This is a temporary and naive implementation # it might be replaced in the future for better performance @@ -228,6 +228,18 @@ function (mo::Maxout)(input::AbstractArray) mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over) end +""" + outdims(c::Maxout, isize) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = Maxout(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32)) +outdims(m, (10, 10)) == (8, 8) +``` +""" +outdims(l::Maxout, isize) = outdims(first(l.over)) + """ SkipConnection(layers, connection) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index eeeea82b..2e3e87d7 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,6 +1,7 @@ using NNlib: conv, ∇conv_data, depthwiseconv _convoutdims(isize, ksize, ssize, pad) = Int.(floor.((isize .- ksize .+ 2 .* pad) ./ ssize .+ 1)) +_convtransoutdims(isize, ksize, ssize, pad) = Int.(ssize .* (isize .- 1) .+ ksize .- 2 .* pad)) expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) @@ -155,6 +156,18 @@ end (a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) +""" + outdims(l::ConvTranspose, isize::Tuple) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = ConvTranspose((3, 3), 3 => 16) +outdims(m, (8, 8)) == (10, 10) +``` +""" +outdims(l::ConvTranspose{N}, isize) where N = _convtransoutdims(isize, size(l.weight)[1:N], l.stride, l.pad[1:N]) + """ DepthwiseConv(size, in=>out) DepthwiseConv(size, in=>out, relu) @@ -302,6 +315,18 @@ end (a::CrossCor{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) +""" + outdims(l::CrossCor, isize::Tuple) + +Calculate the output dimensions given the input dimensions, `isize`. + +```julia +m = CrossCor((3, 3), 3 => 16) +outdims(m, (10, 10)) == (8, 8) +``` +""" +outdims(l::CrossCor{N}, isize) where N = _convoutdims(isize, size(l.weight)[1:N], l.stride, l.pad[1:N]) + """ MaxPool(k) From 6265b1fa39c5d7d289ccd5a00c94ae9f448377fc Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Thu, 5 Dec 2019 22:54:25 -0600 Subject: [PATCH 03/23] Added tests for outdims --- src/layers/basic.jl | 8 ++++---- src/layers/conv.jl | 8 ++++---- test/layers/basic.jl | 15 +++++++++++++++ test/layers/conv.jl | 20 ++++++++++++++++++++ 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index 8794b58c..b62d8bb9 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -49,7 +49,7 @@ m = Chain(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32)) outdims(m, (10, 10)) == (6, 6) ``` """ -outdims(c::Chain, isize) = foldl(∘, map(l -> (x -> outdims(l, x)), c.layers)) +outdims(c::Chain, isize) = foldl(∘, map(l -> (x -> outdims(l, x)), c.layers))(isize) # This is a temporary and naive implementation # it might be replaced in the future for better performance @@ -138,7 +138,7 @@ outdims(m, (5, 2)) == (5,) outdims(m, (10,)) == (5,) ``` """ -outdims(l::Dense, isize) = (size(l.W)[2],) +outdims(l::Dense, isize) = (size(l.W)[1],) """ Diagonal(in::Integer) @@ -234,11 +234,11 @@ end Calculate the output dimensions given the input dimensions, `isize`. ```julia -m = Maxout(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32)) +m = Maxout(() -> Conv((3, 3), 3 => 16), 2) outdims(m, (10, 10)) == (8, 8) ``` """ -outdims(l::Maxout, isize) = outdims(first(l.over)) +outdims(l::Maxout, isize) = outdims(first(l.over), isize) """ SkipConnection(layers, connection) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 2e3e87d7..6ce9bcbf 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,7 +1,7 @@ using NNlib: conv, ∇conv_data, depthwiseconv _convoutdims(isize, ksize, ssize, pad) = Int.(floor.((isize .- ksize .+ 2 .* pad) ./ ssize .+ 1)) -_convtransoutdims(isize, ksize, ssize, pad) = Int.(ssize .* (isize .- 1) .+ ksize .- 2 .* pad)) +_convtransoutdims(isize, ksize, ssize, pad) = Int.(ssize .* (isize .- 1) .+ ksize .- 2 .* pad) expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) @@ -238,7 +238,7 @@ end Calculate the output dimensions given the input dimensions, `isize`. ```julia -m = DepthwiseConv((3, 3), 3 => 16) +m = DepthwiseConv((3, 3), 3 => 6) outdims(m, (10, 10)) == (8, 8) ``` """ @@ -366,7 +366,7 @@ m = MaxPool((2, 2)) outdims(m, (10, 10)) == (5, 5) ``` """ -outdims(l::MaxPool{N}, isize) where N = _convoutdims(isize, l.weight, l.stride, l.pad[1:N]) +outdims(l::MaxPool{N}, isize) where N = _convoutdims(isize, l.k, l.stride, l.pad[1:N]) """ MeanPool(k) @@ -406,4 +406,4 @@ m = MeanPool((2, 2)) outdims(m, (10, 10)) == (5, 5) ``` """ -outdims(l::MeanPool{N}, isize) where N = _convoutdims(isize, l.weight, l.stride, l.pad[1:N]) \ No newline at end of file +outdims(l::MeanPool{N}, isize) where N = _convoutdims(isize, l.k, l.stride, l.pad[1:N]) \ No newline at end of file diff --git a/test/layers/basic.jl b/test/layers/basic.jl index 0ff1776d..421c7721 100644 --- a/test/layers/basic.jl +++ b/test/layers/basic.jl @@ -92,4 +92,19 @@ import Flux: activations @test size(SkipConnection(Dense(10,10), (a,b) -> cat(a, b, dims = 2))(input)) == (10,4) end end + + @testset "output dimensions" begin + m = Chain(Conv((3, 3), 3 => 16), Conv((3, 3), 16 => 32)) + @test Flux.outdims(m, (10, 10)) == (6, 6) + + m = Dense(10, 5) + @test Flux.outdims(m, (5, 2)) == (5,) + @test Flux.outdims(m, (10,)) == (5,) + + m = Flux.Diagonal(10) + @test Flux.outdims(m, (10,)) == (10,) + + m = Maxout(() -> Conv((3, 3), 3 => 16), 2) + @test Flux.outdims(m, (10, 10)) == (8, 8) + end end diff --git a/test/layers/conv.jl b/test/layers/conv.jl index b4136062..5701df80 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -107,3 +107,23 @@ end true end end + +@testset "conv output dimensions" begin + m = Conv((3, 3), 3 => 16) + @test Flux.outdims(m, (10, 10)) == (8, 8) + + m = ConvTranspose((3, 3), 3 => 16) + @test Flux.outdims(m, (8, 8)) == (10, 10) + + m = DepthwiseConv((3, 3), 3 => 6) + @test Flux.outdims(m, (10, 10)) == (8, 8) + + m = CrossCor((3, 3), 3 => 16) + @test Flux.outdims(m, (10, 10)) == (8, 8) + + m = MaxPool((2, 2)) + @test Flux.outdims(m, (10, 10)) == (5, 5) + + m = MeanPool((2, 2)) + @test Flux.outdims(m, (10, 10)) == (5, 5) +end \ No newline at end of file From a64378b11272444f8803ec0155262d47ab0cef71 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Sat, 7 Dec 2019 13:21:26 -0600 Subject: [PATCH 04/23] Switched to using NNlib for conv.jl outdims. --- src/layers/basic.jl | 20 ------------- src/layers/conv.jl | 73 ++++++++++----------------------------------- 2 files changed, 15 insertions(+), 78 deletions(-) diff --git a/src/layers/basic.jl b/src/layers/basic.jl index b62d8bb9..6f056429 100644 --- a/src/layers/basic.jl +++ b/src/layers/basic.jl @@ -169,16 +169,6 @@ function Base.show(io::IO, l::Diagonal) print(io, "Diagonal(", length(l.α), ")") end -""" - outdims(l::Diagonal, isize) - -Calculate the output dimensions given the input dimensions, `isize`. - -```julia -m = Diagonal(10) -outdims(m, (10,)) == (10,) -``` -""" outdims(l::Diagonal, isize) = (length(l.α),) """ @@ -228,16 +218,6 @@ function (mo::Maxout)(input::AbstractArray) mapreduce(f -> f(input), (acc, out) -> max.(acc, out), mo.over) end -""" - outdims(c::Maxout, isize) - -Calculate the output dimensions given the input dimensions, `isize`. - -```julia -m = Maxout(() -> Conv((3, 3), 3 => 16), 2) -outdims(m, (10, 10)) == (8, 8) -``` -""" outdims(l::Maxout, isize) = outdims(first(l.over), isize) """ diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 6ce9bcbf..7b32f999 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -1,6 +1,8 @@ -using NNlib: conv, ∇conv_data, depthwiseconv +using NNlib: conv, ∇conv_data, depthwiseconv, output_size + +# pad dims of x with dims of y until ndims(x) == ndims(y) +_paddims(x::Tuple, y::Tuple) = (x..., y[(end - (length(y) - length(x) - 1)):end]...) -_convoutdims(isize, ksize, ssize, pad) = Int.(floor.((isize .- ksize .+ 2 .* pad) ./ ssize .+ 1)) _convtransoutdims(isize, ksize, ssize, pad) = Int.(ssize .* (isize .- 1) .+ ksize .- 2 .* pad) expand(N, i::Tuple) = i @@ -75,13 +77,16 @@ end outdims(l::Conv, isize::Tuple) Calculate the output dimensions given the input dimensions, `isize`. +Batch size and channel size are ignored as per `NNlib.jl`. ```julia m = Conv((3, 3), 3 => 16) outdims(m, (10, 10)) == (8, 8) +outdims(m, (10, 10, 1, 3)) == (8, 8) ``` """ -outdims(l::Conv{N}, isize) where N = _convoutdims(isize, size(l.weight)[1:N], l.stride, l.pad[1:N]) +outdims(l::Conv, isize) = + output_size(DenseConvDims(_paddims(isize, size(l.weight)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation)) """ ConvTranspose(size, in=>out) @@ -156,17 +161,7 @@ end (a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) -""" - outdims(l::ConvTranspose, isize::Tuple) - -Calculate the output dimensions given the input dimensions, `isize`. - -```julia -m = ConvTranspose((3, 3), 3 => 16) -outdims(m, (8, 8)) == (10, 10) -``` -""" -outdims(l::ConvTranspose{N}, isize) where N = _convtransoutdims(isize, size(l.weight)[1:N], l.stride, l.pad[1:N]) +outdims(l::ConvTranspose{N}, isize) where N = _convtransoutdims(isize[1:2], size(l.weight)[1:N], l.stride, l.pad[1:N]) """ DepthwiseConv(size, in=>out) @@ -232,17 +227,8 @@ end (a::DepthwiseConv{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) -""" - outdims(l::DepthwiseConv, isize::Tuple) - -Calculate the output dimensions given the input dimensions, `isize`. - -```julia -m = DepthwiseConv((3, 3), 3 => 6) -outdims(m, (10, 10)) == (8, 8) -``` -""" -outdims(l::DepthwiseConv{N}, isize) where N = _convoutdims(isize, size(l.weight)[1:N], l.stride, l.pad[1:N]) +outdims(l::DepthwiseConv, isize) = + output_size(DepthwiseConvDims(_paddims(isize, (1, 1, size(l.weight)[end], 1)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation)) """ CrossCor(size, in=>out) @@ -315,17 +301,8 @@ end (a::CrossCor{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) -""" - outdims(l::CrossCor, isize::Tuple) - -Calculate the output dimensions given the input dimensions, `isize`. - -```julia -m = CrossCor((3, 3), 3 => 16) -outdims(m, (10, 10)) == (8, 8) -``` -""" -outdims(l::CrossCor{N}, isize) where N = _convoutdims(isize, size(l.weight)[1:N], l.stride, l.pad[1:N]) +outdims(l::CrossCor, isize) = + output_size(DenseConvDims(_paddims(isize, size(l.weight)), size(l.weight); stride = l.stride, padding = l.pad, dilation = l.dilation)) """ MaxPool(k) @@ -356,17 +333,7 @@ function Base.show(io::IO, m::MaxPool) print(io, "MaxPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")") end -""" - outdims(l::MaxPool, isize::Tuple) - -Calculate the output dimensions given the input dimensions, `isize`. - -```julia -m = MaxPool((2, 2)) -outdims(m, (10, 10)) == (5, 5) -``` -""" -outdims(l::MaxPool{N}, isize) where N = _convoutdims(isize, l.k, l.stride, l.pad[1:N]) +outdims(l::MaxPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.k..., 1, 1)), l.k; stride = l.stride, padding = l.pad)) """ MeanPool(k) @@ -396,14 +363,4 @@ function Base.show(io::IO, m::MeanPool) print(io, "MeanPool(", m.k, ", pad = ", m.pad, ", stride = ", m.stride, ")") end -""" - outdims(l::MeanPool, isize::Tuple) - -Calculate the output dimensions given the input dimensions, `isize`. - -```julia -m = MeanPool((2, 2)) -outdims(m, (10, 10)) == (5, 5) -``` -""" -outdims(l::MeanPool{N}, isize) where N = _convoutdims(isize, l.k, l.stride, l.pad[1:N]) \ No newline at end of file +outdims(l::MeanPool{N}, isize) where N = output_size(PoolDims(_paddims(isize, (l.k..., 1, 1)), l.k; stride = l.stride, padding = l.pad)) \ No newline at end of file From 0cdd11c0dc8e8e82a90467cc66e3b8330ad57682 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Sat, 7 Dec 2019 14:05:50 -0600 Subject: [PATCH 05/23] Added tests for varying padding, stride, and dilation with outdims. --- src/layers/conv.jl | 4 ++-- test/layers/conv.jl | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 7b32f999..03de438a 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -3,7 +3,7 @@ using NNlib: conv, ∇conv_data, depthwiseconv, output_size # pad dims of x with dims of y until ndims(x) == ndims(y) _paddims(x::Tuple, y::Tuple) = (x..., y[(end - (length(y) - length(x) - 1)):end]...) -_convtransoutdims(isize, ksize, ssize, pad) = Int.(ssize .* (isize .- 1) .+ ksize .- 2 .* pad) +_convtransoutdims(isize, ksize, ssize, dsize, pad) = (isize .- 1).*ssize .+ 1 .+ (ksize .- 1).*dsize .- (pad[1:2:end] .+ pad[2:2:end]) expand(N, i::Tuple) = i expand(N, i::Integer) = ntuple(_ -> i, N) @@ -161,7 +161,7 @@ end (a::ConvTranspose{<:Any,<:Any,W})(x::AbstractArray{<:Real}) where {T <: Union{Float32,Float64}, W <: AbstractArray{T}} = a(T.(x)) -outdims(l::ConvTranspose{N}, isize) where N = _convtransoutdims(isize[1:2], size(l.weight)[1:N], l.stride, l.pad[1:N]) +outdims(l::ConvTranspose{N}, isize) where N = _convtransoutdims(isize[1:2], size(l.weight)[1:N], l.stride, l.dilation, l.pad) """ DepthwiseConv(size, in=>out) diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 5701df80..1a22b385 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -111,19 +111,51 @@ end @testset "conv output dimensions" begin m = Conv((3, 3), 3 => 16) @test Flux.outdims(m, (10, 10)) == (8, 8) + m = Conv((3, 3), 3 => 16; stride = 2) + @test Flux.outdims(m, (5, 5)) == (2, 2) + m = Conv((3, 3), 3 => 16; stride = 2, pad = 3) + @test Flux.outdims(m, (5, 5)) == (5, 5) + m = Conv((3, 3), 3 => 16; stride = 2, pad = 3, dilation = 2) + @test Flux.outdims(m, (5, 5)) == (4, 4) m = ConvTranspose((3, 3), 3 => 16) @test Flux.outdims(m, (8, 8)) == (10, 10) + m = ConvTranspose((3, 3), 3 => 16; stride = 2) + @test Flux.outdims(m, (2, 2)) == (5, 5) + m = ConvTranspose((3, 3), 3 => 16; stride = 2, pad = 3) + @test Flux.outdims(m, (5, 5)) == (5, 5) + m = ConvTranspose((3, 3), 3 => 16; stride = 2, pad = 3, dilation = 2) + @test Flux.outdims(m, (4, 4)) == (5, 5) m = DepthwiseConv((3, 3), 3 => 6) @test Flux.outdims(m, (10, 10)) == (8, 8) + m = DepthwiseConv((3, 3), 3 => 6; stride = 2) + @test Flux.outdims(m, (5, 5)) == (2, 2) + m = DepthwiseConv((3, 3), 3 => 6; stride = 2, pad = 3) + @test Flux.outdims(m, (5, 5)) == (5, 5) + m = DepthwiseConv((3, 3), 3 => 6; stride = 2, pad = 3, dilation = 2) + @test Flux.outdims(m, (5, 5)) == (4, 4) m = CrossCor((3, 3), 3 => 16) @test Flux.outdims(m, (10, 10)) == (8, 8) + m = CrossCor((3, 3), 3 => 16; stride = 2) + @test Flux.outdims(m, (5, 5)) == (2, 2) + m = CrossCor((3, 3), 3 => 16; stride = 2, pad = 3) + @test Flux.outdims(m, (5, 5)) == (5, 5) + m = CrossCor((3, 3), 3 => 16; stride = 2, pad = 3, dilation = 2) + @test Flux.outdims(m, (5, 5)) == (4, 4) m = MaxPool((2, 2)) @test Flux.outdims(m, (10, 10)) == (5, 5) + m = MaxPool((2, 2); stride = 1) + @test Flux.outdims(m, (5, 5)) == (4, 4) + m = MaxPool((2, 2); stride = 2, pad = 3) + @test Flux.outdims(m, (5, 5)) == (5, 5) m = MeanPool((2, 2)) @test Flux.outdims(m, (10, 10)) == (5, 5) + m = MeanPool((2, 2); stride = 1) + @test Flux.outdims(m, (5, 5)) == (4, 4) + m = MeanPool((2, 2); stride = 2, pad = 3) + @test Flux.outdims(m, (5, 5)) == (5, 5) end \ No newline at end of file From 04991d3261f006f134beb6333f504ad27e11a706 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Sat, 7 Dec 2019 14:06:11 -0600 Subject: [PATCH 06/23] Added entry to docs for outdims --- docs/src/models/basics.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md index d83fc462..c6dc4e19 100644 --- a/docs/src/models/basics.md +++ b/docs/src/models/basics.md @@ -219,3 +219,13 @@ Flux.@functor Affine ``` This enables a useful extra set of functionality for our `Affine` layer, such as [collecting its parameters](../training/optimisers.md) or [moving it to the GPU](../gpu.md). + +## Utility functions + +Flux provides some utility functions to help you generate models in an automated fashion. + +`outdims` enables you to calculate the spatial output dimensions of layers like `Conv` when applied to input images of a given size. + +```@docs +outdims +``` From 2f854bdfc0d7064f4e28988d6418d9b09324c11e Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Tue, 10 Dec 2019 09:57:08 -0600 Subject: [PATCH 07/23] Recommitting to trigger new build From 9803826a368fa3f527e9c2682876f168e11f75fc Mon Sep 17 00:00:00 2001 From: Chris Rackauckas Date: Mon, 20 Jan 2020 13:53:28 -0500 Subject: [PATCH 08/23] test restructure on the GPU Requires https://github.com/FluxML/Zygote.jl/pull/474 --- test/cuda/cuda.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/cuda/cuda.jl b/test/cuda/cuda.jl index 1576d88f..911eef93 100644 --- a/test/cuda/cuda.jl +++ b/test/cuda/cuda.jl @@ -58,6 +58,13 @@ end @test y[3,:] isa CuArray end +@testset "restructure gpu" begin + dudt = Dense(1,1) |> gpu + p,re = Flux.destructure(dudt) + foo(x) = sum(re(p)(x)) + @test gradient(foo, cu(rand(1)))[1] isa CuArray +end + if CuArrays.has_cudnn() @info "Testing Flux/CUDNN" include("cudnn.jl") From 197a1a70c09deba9f4d5ae1bf74bc12a86314288 Mon Sep 17 00:00:00 2001 From: pranjaldatta Date: Fri, 7 Feb 2020 03:47:19 +0530 Subject: [PATCH 09/23] added BostonHousing dataset and testing --- src/data/Data.jl | 3 + src/data/housing.jl | 136 ++++++++++++++++++++++++++++++++++++++++++++ test/data.jl | 8 ++- 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 src/data/housing.jl diff --git a/src/data/Data.jl b/src/data/Data.jl index d7cd0303..88af9549 100644 --- a/src/data/Data.jl +++ b/src/data/Data.jl @@ -42,4 +42,7 @@ using .Sentiment include("iris.jl") export Iris +include("housing.jl") +export Housing + end diff --git a/src/data/housing.jl b/src/data/housing.jl new file mode 100644 index 00000000..0d167dc0 --- /dev/null +++ b/src/data/housing.jl @@ -0,0 +1,136 @@ +""" +1. Title: Boston Housing Data + +2. Sources: + (a) Origin: This dataset was taken from the StatLib library which is + maintained at Carnegie Mellon University. + (b) Creator: Harrison, D. and Rubinfeld, D.L. 'Hedonic prices and the + demand for clean air', J. Environ. Economics & Management, + vol.5, 81-102, 1978. + (c) Date: July 7, 1993 + +3. Number of Instances: 506 + +4. Number of Attributes: 13 continuous attributes (including "class" + attribute "MEDV"), 1 binary-valued attribute. + +5. Attribute Information: + + 1. CRIM per capita crime rate by town + 2. ZN proportion of residential land zoned for lots over + 25,000 sq.ft. + 3. INDUS proportion of non-retail business acres per town + 4. CHAS Charles River dummy variable (= 1 if tract bounds + river; 0 otherwise) + 5. NOX nitric oxides concentration (parts per 10 million) + 6. RM average number of rooms per dwelling + 7. AGE proportion of owner-occupied units built prior to 1940 + 8. DIS weighted distances to five Boston employment centres + 9. RAD index of accessibility to radial highways + 10. TAX full-value property-tax rate per 10,000 dollars + 11. PTRATIO pupil-teacher ratio by town + 12. B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks + by town + 13. LSTAT % lower status of the population + 14. MEDV Median value of owner-occupied homes in 1000's of dollars + + Downloaded From: https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data + +""" +module Housing + +using DelimitedFiles +using ..Data: deps, download_and_verify + +#Uncomment if package exists +#const cache_prefix = "https://cache.julialang.org/" +const cache_prefix = "" + +function load() + isfile(deps("housing.data")) && return + + @info "Downloading the Boston housing Dataset" + download_and_verify("$(cache_prefix)https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", + deps("housing.data"), + "baadf72995725d76efe787b664e1f083388c79ba21ef9a7990d87f774184735a") + + #@info "Download complete. Working on the files" + path = deps() + isfile(deps("housing.data")) && touch(joinpath(path, "tempfile.data")) + open(joinpath(path, "tempfile.data"), "a") do fout + open(deps("housing.data"), "r") do fin + for line in eachline(fin) + line = replace(lstrip(line), r" +" => s",") + println(fout, line) + end + end + end + mv(joinpath(path, "tempfile.data"), deps("housing.data"), force=true) +end + +""" +Gets the targets for the Boston housing dataset, a 506 element array listing the targets for each example + +```jldoctest +julia> using Flux + +julia> target = Flux.Data.Housing.targets() + +julia> summary(target) +506×1 Array{Float64,2} + +julia> target[1] +24.0 + +""" +function targets() + load() + housing = readdlm(deps("housing.data"), ',') + reshape(Vector{Float64}(housing[1:end,end]), (506, 1)) +end + + +""" +Gets the names of the features provided in the dataset + +""" +function feature_names() + ["crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio","b","lstat"] +end + + +""" +Gets the features of the Boston Housing Dataset. This is a 506x13 Matrix of Float64 datatypes. +The values are in the order ["crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio","b","lstat"]. +It has 506 examples. + +```jldoctest +julia> using Flux + +julia> features = Flux.Data.Housing.features() + +julia> summary(features) +506×13 Array{Float64,2} + +julia> features[1, :] +13-element Array{Float64,1}: +0.00632 +18.0 +2.31 +0.0 +0.538 + ⋮ +296.0 +15.3 +396.9 +4.98 + +""" +function features() + load() + housing = readdlm(deps("housing.data"), ',') + Matrix{Float64}(housing[1:end, 1:13]) +end + + +end \ No newline at end of file diff --git a/test/data.jl b/test/data.jl index 6b777873..aa913806 100644 --- a/test/data.jl +++ b/test/data.jl @@ -16,7 +16,13 @@ using Test @test Data.Sentiment.train() isa Vector{Data.Tree{Any}} @test Iris.features() isa Matrix -@test size(Iris.features()) == (4,150) +@test size(Iris.features()) == (4,150) @test Iris.labels() isa Vector{String} @test size(Iris.labels()) == (150,) + +@test Housing.features() isa Matrix +@test size(Housing.features()) == (506, 13) + +@test Housing.targets() isa Array{Float64} +@test size(Housing.targets()) == (506, 1) \ No newline at end of file From c37fc3cfa63a82deec33d40f837b880341440c7a Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Sun, 9 Feb 2020 19:45:04 -0600 Subject: [PATCH 10/23] Recommitting to trigger build From f5b9cf659cb14f0b05ab98b2fef70f705adfc8c3 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Thu, 20 Feb 2020 23:38:56 -0600 Subject: [PATCH 11/23] Updated docs to specify exactly what layers support outdims --- docs/src/models/basics.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md index c6dc4e19..6e8d0b76 100644 --- a/docs/src/models/basics.md +++ b/docs/src/models/basics.md @@ -225,6 +225,17 @@ This enables a useful extra set of functionality for our `Affine` layer, such as Flux provides some utility functions to help you generate models in an automated fashion. `outdims` enables you to calculate the spatial output dimensions of layers like `Conv` when applied to input images of a given size. +Currently limited to the following layers: +- `Chain` +- `Dense` +- `Conv` +- `Diagonal` +- `Maxout` +- `ConvTranspose` +- `DepthwiseConv` +- `CrossCor` +- `MaxPool` +- `MeanPool` ```@docs outdims From 6ced7e1ecff379cf3df3f62f05557317dc56e41f Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sun, 23 Feb 2020 13:42:11 -0500 Subject: [PATCH 12/23] expand Colors compat --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f76063bd..71282a10 100644 --- a/Project.toml +++ b/Project.toml @@ -27,7 +27,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" AbstractTrees = "0.2, 0.3" Adapt = "1" CodecZlib = "0.5, 0.6" -Colors = "0.8, 0.9" +Colors = "0.8, 0.9, 0.10, 0.11" CuArrays = "1.6" Juno = "0.5, 0.6, 0.7, 0.8" MacroTools = "0.3, 0.4, 0.5" From db4eaf254b5de8902349afbd705243c22d0ec91a Mon Sep 17 00:00:00 2001 From: Bulat Suleymanov Date: Mon, 24 Feb 2020 13:16:51 +0500 Subject: [PATCH 13/23] Edit description of convolutional layer --- src/layers/conv.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index f4de3ffc..829051ae 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -17,7 +17,7 @@ Example: Applying Conv layer to a 1-channel input using a 2x2 window size, out = 16 Conv((2, 2), 1=>16, relu) -Data should be stored in WHCN order (width, height, # channels, # batches). +Data should be stored in WHCN order (width, height, # channels, batch size). In other words, a 100×100 RGB image would be a `100×100×3×1` array, and a batch of 50 would be a `100×100×3×50` array. From 569021a9f1f9910f7f2e9ac6869bb149b9da7023 Mon Sep 17 00:00:00 2001 From: pranjaldatta Date: Wed, 26 Feb 2020 15:05:23 +0530 Subject: [PATCH 14/23] added newlines at end of file --- src/data/housing.jl | 2 +- test/data.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/housing.jl b/src/data/housing.jl index 0d167dc0..61391304 100644 --- a/src/data/housing.jl +++ b/src/data/housing.jl @@ -133,4 +133,4 @@ function features() end -end \ No newline at end of file +end diff --git a/test/data.jl b/test/data.jl index aa913806..6c012a93 100644 --- a/test/data.jl +++ b/test/data.jl @@ -25,4 +25,4 @@ using Test @test size(Housing.features()) == (506, 13) @test Housing.targets() isa Array{Float64} -@test size(Housing.targets()) == (506, 1) \ No newline at end of file +@test size(Housing.targets()) == (506, 1) From 759fe9df2fb0a4665052383fae1b0fd8978a2f52 Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Wed, 26 Feb 2020 20:27:39 +0100 Subject: [PATCH 15/23] update docs and export update! --- docs/src/training/optimisers.md | 3 ++- src/optimise/Optimise.jl | 2 +- src/optimise/train.jl | 17 +++++++++++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/src/training/optimisers.md b/docs/src/training/optimisers.md index 5e8b95de..37288b5d 100644 --- a/docs/src/training/optimisers.md +++ b/docs/src/training/optimisers.md @@ -21,7 +21,7 @@ grads = gradient(() -> loss(x, y), θ) We want to update each parameter, using the gradient, in order to improve (reduce) the loss. Here's one way to do that: ```julia -using Flux: update! +using Flux.Optimise: update! η = 0.1 # Learning Rate for p in (W, b) @@ -46,6 +46,7 @@ An optimiser `update!` accepts a parameter and a gradient, and updates the param All optimisers return an object that, when passed to `train!`, will update the parameters passed to it. ```@docs +Flux.Optimise.update! Descent Momentum Nesterov diff --git a/src/optimise/Optimise.jl b/src/optimise/Optimise.jl index 68c18a6f..28a1849d 100644 --- a/src/optimise/Optimise.jl +++ b/src/optimise/Optimise.jl @@ -1,6 +1,6 @@ module Optimise -export train!, +export train!, update!, SGD, Descent, ADAM, Momentum, Nesterov, RMSProp, ADAGrad, AdaMax, ADADelta, AMSGrad, NADAM, ADAMW,RADAM, InvDecay, ExpDecay, WeightDecay, stop, Optimiser diff --git a/src/optimise/train.jl b/src/optimise/train.jl index ae0f334c..59404a42 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -1,9 +1,22 @@ using Juno import Zygote: Params, gradient + +""" + update!(opt, p, g) + update!(opt, ps::Params, gs) + +Perform an update step of the parameters `ps` (or the single parameter `p`) +according to optimizer `opt` and the gradients `gs` (the gradient `g`). + +As a result, the parameters are mutated and the optimizer's internal state may change. + + update!(x, x̄) + +Update the array `x` according to `x .-= x̄`. +""" function update!(x::AbstractArray, x̄) - x .+= x̄ - return x + x .-= x̄ end function update!(opt, x, x̄) From a121742f9c766b954f56a46e631333853e97d5ad Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 27 Feb 2020 13:56:05 +0530 Subject: [PATCH 16/23] pkg up --- Manifest.toml | 68 ++++++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 12986ccd..55f3e229 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -40,15 +40,15 @@ version = "2.1.0" [[CUDAdrv]] deps = ["CEnum", "CUDAapi", "Printf"] -git-tree-sha1 = "1fce616fa0806c67c133eb1d2f68f0f1a7504665" +git-tree-sha1 = "5660775f2a3214420add960e1ff2baf46d5297cd" uuid = "c5f51814-7f29-56b8-a69c-e4d8f6be1fde" -version = "5.0.1" +version = "5.1.0" [[CUDAnative]] deps = ["Adapt", "CEnum", "CUDAapi", "CUDAdrv", "DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Printf", "TimerOutputs"] -git-tree-sha1 = "6e11d5c2c91fc623952e94c4fb73f9c4db74795a" +git-tree-sha1 = "e0c2805c9a7d338823c0d8f574242e284410fa61" uuid = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17" -version = "2.7.0" +version = "2.9.1" [[CodecZlib]] deps = ["BinaryProvider", "Libdl", "TranscodingStreams"] @@ -74,6 +74,12 @@ git-tree-sha1 = "efdaf19ab11c7889334ca247ff4c9f7c322817b0" uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" version = "0.2.0" +[[CompilerSupportLibraries_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "b57c5d019367c90f234a7bc7e24ff0a84971da5d" +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.2.0+1" + [[CuArrays]] deps = ["AbstractFFTs", "Adapt", "CEnum", "CUDAapi", "CUDAdrv", "CUDAnative", "DataStructures", "GPUArrays", "Libdl", "LinearAlgebra", "MacroTools", "NNlib", "Printf", "Random", "Requires", "SparseArrays", "TimerOutputs"] git-tree-sha1 = "51fbe053dea29ed2513e02d38380007310cf4c4b" @@ -87,9 +93,9 @@ version = "1.1.0" [[DataStructures]] deps = ["InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "f784254f428fb8fd7ac15982e5862a38a44523d3" +git-tree-sha1 = "5a431d46abf2ef2a4d5d00bd0ae61f651cf854c8" uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.17.7" +version = "0.17.10" [[Dates]] deps = ["Printf"] @@ -107,9 +113,9 @@ version = "1.0.2" [[DiffRules]] deps = ["NaNMath", "Random", "SpecialFunctions"] -git-tree-sha1 = "10dca52cf6d4a62d82528262921daf63b99704a2" +git-tree-sha1 = "eb0c34204c8410888844ada5359ac8b96292cfd1" uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" -version = "1.0.0" +version = "1.0.1" [[Distributed]] deps = ["Random", "Serialization", "Sockets"] @@ -123,15 +129,15 @@ version = "1.2.0" [[FFTW_jll]] deps = ["Libdl", "Pkg"] -git-tree-sha1 = "05674f209a6e3387dd103a945b0113eeb64b1a58" +git-tree-sha1 = "ddb57f4cf125243b4aa4908c94d73a805f3cbf2c" uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" -version = "3.3.9+3" +version = "3.3.9+4" [[FillArrays]] deps = ["LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "fec413d4fc547992eb62a5c544cedb6d7853c1f5" +git-tree-sha1 = "85c6b57e2680fa28d5c8adc798967377646fbf66" uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.8.4" +version = "0.8.5" [[FixedPointNumbers]] git-tree-sha1 = "d14a6fa5890ea3a7e5dcab6811114f132fec2b4b" @@ -140,9 +146,9 @@ version = "0.6.1" [[ForwardDiff]] deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"] -git-tree-sha1 = "840700059391d36e2498d89c2e82c08f261f2a2a" +git-tree-sha1 = "88b082d492be6b63f967b6c96b352e25ced1a34c" uuid = "f6369f11-7733-5829-9624-2563aa707210" -version = "0.10.8" +version = "0.10.9" [[GPUArrays]] deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"] @@ -152,9 +158,9 @@ version = "2.0.1" [[IRTools]] deps = ["InteractiveUtils", "MacroTools", "Test"] -git-tree-sha1 = "72421971e60917b8cd7737f9577c4f0f87eab306" +git-tree-sha1 = "1a4355e4b5b50be2311ebb644f34f3306dbd0410" uuid = "7869d1d1-7146-5819-86e3-90919afe41df" -version = "0.3.0" +version = "0.3.1" [[IntelOpenMP_jll]] deps = ["Libdl", "Pkg"] @@ -192,10 +198,10 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" [[MKL_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "61069ae718b8ab1e325bbfb4e5268902e7ea08e3" +deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] +git-tree-sha1 = "720629cc8cbd12c146ca01b661fd1a6cf66e2ff4" uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -version = "2019.0.117+0" +version = "2019.0.117+2" [[MacroTools]] deps = ["DataStructures", "Markdown", "Random"] @@ -234,10 +240,10 @@ uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" version = "0.3.3" [[OpenSpecFun_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "65f672edebf3f4e613ddf37db9dcbd7a407e5e90" +deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] +git-tree-sha1 = "d110040968b9afe95c6bd9c6233570b0fe8abd22" uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" -version = "0.5.3+1" +version = "0.5.3+2" [[OrderedCollections]] deps = ["Random", "Serialization", "Test"] @@ -273,9 +279,9 @@ version = "0.2.0" [[Requires]] deps = ["UUIDs"] -git-tree-sha1 = "999513b7dea8ac17359ed50ae8ea089e4464e35e" +git-tree-sha1 = "d37400976e98018ee840e0ca4f9d20baa231dc6b" uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.0.0" +version = "1.0.1" [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" @@ -298,9 +304,9 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[SpecialFunctions]] deps = ["OpenSpecFun_jll"] -git-tree-sha1 = "268052ee908b2c086cc0011f528694f02f3e2408" +git-tree-sha1 = "e19b98acb182567bcb7b75bb5d9eedf3a3b5ec6c" uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "0.9.0" +version = "0.10.0" [[StaticArrays]] deps = ["LinearAlgebra", "Random", "Statistics"] @@ -349,15 +355,17 @@ version = "0.9.0" [[Zlib_jll]] deps = ["Libdl", "Pkg"] -git-tree-sha1 = "5618a43055eb09377edca21d19d0e99bce24a9c3" +git-tree-sha1 = "fd36a6739e256527287c5444960d0266712cd49e" uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.11+7" +version = "1.2.11+8" [[Zygote]] deps = ["DiffRules", "FFTW", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] -git-tree-sha1 = "74382bcc4c1e8075e14554da67d75565f8fb7827" +git-tree-sha1 = "ab2683e7670925ed73b7f076b26847683e38db8c" +repo-rev = "master" +repo-url = "https://github.com/FluxML/Zygote.jl.git" uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" -version = "0.4.5" +version = "0.4.7" [[ZygoteRules]] deps = ["MacroTools"] From 35f6998be7572bb557948d3cee65797be22c9019 Mon Sep 17 00:00:00 2001 From: Dhairya Gandhi Date: Thu, 27 Feb 2020 22:19:06 +0530 Subject: [PATCH 17/23] pkg up --- Manifest.toml | 60 +++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 55f3e229..693f7ca2 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -8,15 +8,15 @@ version = "0.5.0" [[AbstractTrees]] deps = ["Markdown"] -git-tree-sha1 = "8201f932428d25a2e2903300764515754847d87d" +git-tree-sha1 = "86d092c2599f1f7bb01668bf8eb3412f98d61e47" uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.3.0" +version = "0.3.2" [[Adapt]] deps = ["LinearAlgebra"] -git-tree-sha1 = "82dab828020b872fa9efd3abec1152b075bc7cbf" +git-tree-sha1 = "c88cfc7f9c1f9f8633cddf0b56e86302b70f64c5" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "1.0.0" +version = "1.0.1" [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" @@ -34,21 +34,21 @@ version = "0.2.0" [[CUDAapi]] deps = ["Libdl", "Logging"] -git-tree-sha1 = "56a813440ac98a1aa64672ab460a1512552211a7" +git-tree-sha1 = "d7ceadd8f821177d05b897c0517e94633db535fe" uuid = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3" -version = "2.1.0" +version = "3.1.0" [[CUDAdrv]] deps = ["CEnum", "CUDAapi", "Printf"] -git-tree-sha1 = "5660775f2a3214420add960e1ff2baf46d5297cd" +git-tree-sha1 = "01e90fa34e25776bc7c8661183d4519149ebfe59" uuid = "c5f51814-7f29-56b8-a69c-e4d8f6be1fde" -version = "5.1.0" +version = "6.0.0" [[CUDAnative]] deps = ["Adapt", "CEnum", "CUDAapi", "CUDAdrv", "DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Printf", "TimerOutputs"] -git-tree-sha1 = "e0c2805c9a7d338823c0d8f574242e284410fa61" +git-tree-sha1 = "f86269ff60ebe082a2806ecbce51f3cadc68afe9" uuid = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17" -version = "2.9.1" +version = "2.10.2" [[CodecZlib]] deps = ["BinaryProvider", "Libdl", "TranscodingStreams"] @@ -58,15 +58,15 @@ version = "0.6.0" [[ColorTypes]] deps = ["FixedPointNumbers", "Random"] -git-tree-sha1 = "7b62b728a5f3dd6ee3b23910303ccf27e82fad5e" +git-tree-sha1 = "b9de8dc6106e09c79f3f776c27c62360d30e5eb8" uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" -version = "0.8.1" +version = "0.9.1" [[Colors]] deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Printf", "Reexport"] -git-tree-sha1 = "c9c1845d6bf22e34738bee65c357a69f416ed5d1" +git-tree-sha1 = "177d8b959d3c103a6d57574c38ee79c81059c31b" uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" -version = "0.9.6" +version = "0.11.2" [[CommonSubexpressions]] deps = ["Test"] @@ -82,9 +82,9 @@ version = "0.2.0+1" [[CuArrays]] deps = ["AbstractFFTs", "Adapt", "CEnum", "CUDAapi", "CUDAdrv", "CUDAnative", "DataStructures", "GPUArrays", "Libdl", "LinearAlgebra", "MacroTools", "NNlib", "Printf", "Random", "Requires", "SparseArrays", "TimerOutputs"] -git-tree-sha1 = "51fbe053dea29ed2513e02d38380007310cf4c4b" +git-tree-sha1 = "7c20c5a45bb245cf248f454d26966ea70255b271" uuid = "3a865a2d-5b23-5a0f-bc46-62713ec82fae" -version = "1.6.0" +version = "1.7.2" [[DataAPI]] git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252" @@ -140,9 +140,9 @@ uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" version = "0.8.5" [[FixedPointNumbers]] -git-tree-sha1 = "d14a6fa5890ea3a7e5dcab6811114f132fec2b4b" +git-tree-sha1 = "4aaea64dd0c30ad79037084f8ca2b94348e65eaa" uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" -version = "0.6.1" +version = "0.7.1" [[ForwardDiff]] deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"] @@ -173,10 +173,10 @@ deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[Juno]] -deps = ["Base64", "Logging", "Media", "Profile", "Test"] -git-tree-sha1 = "30d94657a422d09cb97b6f86f04f750fa9c50df8" +deps = ["Base64", "Logging", "Media", "Profile"] +git-tree-sha1 = "4f2249fb58cfb140eeb89428e31791e2f8959d8c" uuid = "e5e0dc1b-0480-54bc-9374-aad01c23163d" -version = "0.7.2" +version = "0.8.0" [[LLVM]] deps = ["CEnum", "Libdl", "Printf", "Unicode"] @@ -205,9 +205,9 @@ version = "2019.0.117+2" [[MacroTools]] deps = ["DataStructures", "Markdown", "Random"] -git-tree-sha1 = "e2fc7a55bb2224e203bbd8b59f72b91323233458" +git-tree-sha1 = "07ee65e03e28ca88bc9a338a3726ae0c3efaa94b" uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.3" +version = "0.5.4" [[Markdown]] deps = ["Base64"] @@ -230,9 +230,9 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[NNlib]] deps = ["BinaryProvider", "Libdl", "LinearAlgebra", "Requires", "Statistics"] -git-tree-sha1 = "135c0de4794d5e214b06f1fb4787af4a72896e61" +git-tree-sha1 = "755c0bab3912ff782167e1b4b774b833f8a0e550" uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.6.2" +version = "0.6.4" [[NaNMath]] git-tree-sha1 = "928b8ca9b2791081dc71a51c55347c27c618760f" @@ -320,9 +320,9 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [[StatsBase]] deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] -git-tree-sha1 = "c53e809e63fe5cf5de13632090bc3520649c9950" +git-tree-sha1 = "be5c7d45daa449d12868f4466dbf5882242cf2d9" uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.32.0" +version = "0.32.1" [[Test]] deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] @@ -349,9 +349,9 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[ZipFile]] deps = ["Libdl", "Printf", "Zlib_jll"] -git-tree-sha1 = "5de8320a46812da1a8ca98b16a8a4546d44efa62" +git-tree-sha1 = "8748302cfdec02c4ae9c97b112cf10003f7f767f" uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" -version = "0.9.0" +version = "0.9.1" [[Zlib_jll]] deps = ["Libdl", "Pkg"] @@ -361,7 +361,7 @@ version = "1.2.11+8" [[Zygote]] deps = ["DiffRules", "FFTW", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] -git-tree-sha1 = "ab2683e7670925ed73b7f076b26847683e38db8c" +git-tree-sha1 = "3c65158c0aa0808cdfff8bca2a36430b038aad00" repo-rev = "master" repo-url = "https://github.com/FluxML/Zygote.jl.git" uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" From 425fcdbe6964d581b4d5f6eda1615e883a83b5bd Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Sat, 29 Feb 2020 11:14:48 +0100 Subject: [PATCH 18/23] NNlib docs + misc docs improvements --- docs/make.jl | 3 +- docs/src/gpu.md | 4 +- docs/src/models/layers.md | 30 ++++------- docs/src/models/nnlib.md | 37 +++++++++++++ docs/src/models/regularisation.md | 4 +- src/layers/normalise.jl | 16 ++++-- src/layers/stateless.jl | 87 ++++++++++++++++++------------- 7 files changed, 115 insertions(+), 66 deletions(-) create mode 100644 docs/src/models/nnlib.md diff --git a/docs/make.jl b/docs/make.jl index b950e959..fe3544fc 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -13,7 +13,8 @@ makedocs(modules=[Flux, NNlib], ["Basics" => "models/basics.md", "Recurrence" => "models/recurrence.md", "Regularisation" => "models/regularisation.md", - "Model Reference" => "models/layers.md"], + "Model Reference" => "models/layers.md", + "NNlib" => "models/nnlib.md"], "Training Models" => ["Optimisers" => "training/optimisers.md", "Training" => "training/training.md"], diff --git a/docs/src/gpu.md b/docs/src/gpu.md index bb13fdd1..19d0c8c6 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -30,7 +30,7 @@ If you define a structured model, like a `Dense` layer or `Chain`, you just need ```julia d = Dense(10, 5, σ) d = fmap(cu, d) -d.W # Tracked CuArray +d.W # CuArray d(cu(rand(10))) # CuArray output m = Chain(Dense(10, 5, σ), Dense(5, 2), softmax) @@ -53,7 +53,7 @@ julia> x = rand(10) |> gpu 0.511655 julia> m(x) -Tracked 5-element CuArray{Float32,1}: +5-element CuArray{Float32,1}: -0.30535 ⋮ -0.618002 diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index 5f2ab3ce..41e98f32 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -40,19 +40,6 @@ Maxout SkipConnection ``` -## Activation Functions - -Non-linearities that go between layers of your model. Most of these functions are defined in [NNlib](https://github.com/FluxML/NNlib.jl) but are available by default in Flux. - -Note that, unless otherwise stated, activation functions operate on scalars. To apply them to an array you can call `σ.(xs)`, `relu.(xs)` and so on. - -```@docs -σ -relu -leakyrelu -elu -swish -``` ## Normalisation & Regularisation @@ -61,6 +48,7 @@ These layers don't affect the structure of the network but may improve training ```@docs BatchNorm Dropout +Flux.dropout AlphaDropout LayerNorm GroupNorm @@ -68,12 +56,12 @@ GroupNorm ## Cost Functions ```@docs -mse -crossentropy -logitcrossentropy -binarycrossentropy -logitbinarycrossentropy -kldivergence -poisson -hinge +Flux.mse +Flux.crossentropy +Flux.logitcrossentropy +Flux.binarycrossentropy +Flux.logitbinarycrossentropy +Flux.kldivergence +Flux.poisson +Flux.hinge ``` diff --git a/docs/src/models/nnlib.md b/docs/src/models/nnlib.md new file mode 100644 index 00000000..f5732574 --- /dev/null +++ b/docs/src/models/nnlib.md @@ -0,0 +1,37 @@ +## NNlib +Flux re-exports all of the functions exported by the [NNlib](https://github.com/FluxML/NNlib.jl) package. + +## Activation Functions +Non-linearities that go between layers of your model. Note that, unless otherwise stated, activation functions operate on scalars. To apply them to an array you can call `σ.(xs)`, `relu.(xs)` and so on. + +```@docs +NNlib.elu +NNlib.gelu +NNlib.leakyrelu +NNlib.logcosh +NNlib.logsigmoid +NNlib.sigmoid +NNlib.relu +NNlib.selu +NNlib.softplus +NNlib.softsign +NNlib.swish +``` + +## Softmax +```@docs +NNlib.softmax +NNlib.logsoftmax +``` + +## Pooling +```@docs +NNlib.maxpool +NNlib.meanpool +``` + +## Convolution +```@docs +NNlib.conv +NNlib.depthwiseconv +``` \ No newline at end of file diff --git a/docs/src/models/regularisation.md b/docs/src/models/regularisation.md index e1d88d77..02aa3da8 100644 --- a/docs/src/models/regularisation.md +++ b/docs/src/models/regularisation.md @@ -31,7 +31,7 @@ julia> params(m) param([0.0, 0.0, 0.0, 0.0, 0.0]) julia> sum(norm, params(m)) -26.01749952921026 (tracked) +26.01749952921026 ``` Here's a larger example with a multi-layer perceptron. @@ -52,7 +52,7 @@ One can also easily add per-layer regularisation via the `activations` function: ```julia julia> using Flux: activations -julia> c = Chain(Dense(10,5,σ),Dense(5,2),softmax) +julia> c = Chain(Dense(10, 5, σ), Dense(5, 2), softmax) Chain(Dense(10, 5, σ), Dense(5, 2), softmax) julia> activations(c, rand(10)) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index b421d3e7..2268fdc0 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -7,6 +7,16 @@ _dropout_shape(s, dims) = tuple((i ∉ dims ? 1 : si for (i, si) ∈ enumerate(s _dropout_kernel(y::T, p, q) where {T} = y > p ? T(1 / q) : T(0) +""" + dropout(p, dims = :) + +Dropout function. For each input, either sets that input to `0` (with probability +`p`) or scales it by `1/(1-p)`. The `dims` argument is to specify the unbroadcasted +dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is +used as a regularisation, i.e. it reduces overfitting during training. + +See also [`Dropout`](@ref). +""" dropout(x, p; dims = :) = x @adjoint function dropout(x, p; dims = :) @@ -18,10 +28,7 @@ end """ Dropout(p, dims = :) -A Dropout layer. For each input, either sets that input to `0` (with probability -`p`) or scales it by `1/(1-p)`. The `dims` argument is to specified the unbroadcasted - dimensions, i.e. `dims=1` does dropout along columns and `dims=2` along rows. This is - used as a regularisation, i.e. it reduces overfitting during training. see also [`dropout`](@ref). +A Dropout layer. In the forward pass, applies the [`dropout`](@ref) function on the input. """ mutable struct Dropout{F,D} p::F @@ -43,6 +50,7 @@ end """ AlphaDropout(p) + A dropout layer. It is used in Self-Normalizing Neural Networks. (https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf) The AlphaDropout layer ensures that mean and variance of activations remains the same as before. diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl index 159a8385..5de5842b 100644 --- a/src/layers/stateless.jl +++ b/src/layers/stateless.jl @@ -1,10 +1,12 @@ -using CuArrays -using NNlib: logsoftmax, logσ - # Cost functions +""" + mse(ŷ, y) +Return the mean squared error `sum((ŷ .- y).^2) / length(y)`. +""" mse(ŷ, y) = sum((ŷ .- y).^2) * 1 // length(y) + function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Nothing) return -sum(y .* log.(ŷ)) * 1 // size(y, 2) end @@ -17,10 +19,26 @@ function _crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat, weight::Abstr return -sum(y .* log.(ŷ) .* weight) * 1 // size(y, 2) end +""" + crossentropy(ŷ, y; weight=1) + +Return the crossentropy computed as `-sum(y .* log.(ŷ) .* weight) / size(y, 2)`. + +See also [`logitcrossentropy`](@ref), [`binarycrossentropy`](@ref). +""" crossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight=nothing) = _crossentropy(ŷ, y, weight) -function logitcrossentropy(logŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1) - return -sum(y .* logsoftmax(logŷ) .* weight) * 1 // size(y, 2) +""" + logitcrossentropy(ŷ, y; weight=1) + +Return the crossentropy computed after a [softmax](@ref) operation: + + -sum(y .* logsoftmax(ŷ) .* weight) / size(y, 2) + +See also [`crossentropy`](@ref), [`binarycrossentropy`](@ref). +""" +function logitcrossentropy(ŷ::AbstractVecOrMat, y::AbstractVecOrMat; weight = 1) + return -sum(y .* logsoftmax(ŷ) .* weight) * 1 // size(y, 2) end """ @@ -28,11 +46,7 @@ end Return `-y*log(ŷ + ϵ) - (1-y)*log(1-ŷ + ϵ)`. The ϵ term provides numerical stability. - julia> binarycrossentropy.(σ.([-1.1491, 0.8619, 0.3127]), [1, 1, 0.]) - 3-element Array{Float64,1}: - 1.4244 - 0.352317 - 0.86167 +Typically, the prediction `ŷ` is given by the output of a [`sigmoid`](@ref) activation. """ binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) @@ -40,44 +54,42 @@ binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ CuArrays.@cufunc binarycrossentropy(ŷ, y; ϵ=eps(ŷ)) = -y*log(ŷ + ϵ) - (1 - y)*log(1 - ŷ + ϵ) """ - logitbinarycrossentropy(logŷ, y) + logitbinarycrossentropy(ŷ, y) -`logitbinarycrossentropy(logŷ, y)` is mathematically equivalent to `binarycrossentropy(σ(logŷ), y)` +`logitbinarycrossentropy(ŷ, y)` is mathematically equivalent to `binarycrossentropy(σ(ŷ), y)` but it is more numerically stable. - julia> logitbinarycrossentropy.([-1.1491, 0.8619, 0.3127], [1, 1, 0.]) - 3-element Array{Float64,1}: - 1.4244 - 0.352317 - 0.86167 +See also [`binarycrossentropy`](@ref), [`sigmoid`](@ref), [`logsigmoid`](@ref). """ -logitbinarycrossentropy(logŷ, y) = (1 - y)*logŷ - logσ(logŷ) +logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ) # Re-definition to fix interaction with CuArrays. -CuArrays.@cufunc logitbinarycrossentropy(logŷ, y) = (1 - y)*logŷ - logσ(logŷ) +CuArrays.@cufunc logitbinarycrossentropy(ŷ, y) = (1 - y)*ŷ - logσ(ŷ) """ - normalise(x::AbstractArray; dims=1) + normalise(x; dims=1) Normalises `x` to mean 0 and standard deviation 1, across the dimensions given by `dims`. Defaults to normalising over columns. - julia> a = reshape(collect(1:9), 3, 3) - 3×3 Array{Int64,2}: - 1 4 7 - 2 5 8 - 3 6 9 +```julia-repl +julia> a = reshape(collect(1:9), 3, 3) +3×3 Array{Int64,2}: + 1 4 7 + 2 5 8 + 3 6 9 - julia> normalise(a) - 3×3 Array{Float64,2}: - -1.22474 -1.22474 -1.22474 - 0.0 0.0 0.0 - 1.22474 1.22474 1.22474 +julia> normalise(a) +3×3 Array{Float64,2}: + -1.22474 -1.22474 -1.22474 + 0.0 0.0 0.0 + 1.22474 1.22474 1.22474 - julia> normalise(a, dims=2) - 3×3 Array{Float64,2}: - -1.22474 0.0 1.22474 - -1.22474 0.0 1.22474 - -1.22474 0.0 1.22474 +julia> normalise(a, dims=2) +3×3 Array{Float64,2}: + -1.22474 0.0 1.22474 + -1.22474 0.0 1.22474 + -1.22474 0.0 1.22474 +``` """ function normalise(x::AbstractArray; dims=1) μ′ = mean(x, dims = dims) @@ -87,6 +99,7 @@ end """ kldivergence(ŷ, y) + KLDivergence is a measure of how much one probability distribution is different from the other. It is always non-negative and zero only when both the distributions are equal everywhere. [KL Divergence](https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence). @@ -99,6 +112,7 @@ end """ poisson(ŷ, y) + Poisson loss function is a measure of how the predicted distribution diverges from the expected distribution. [Poisson Loss](https://peltarion.com/knowledge-center/documentation/modeling-view/build-an-ai-model/loss-functions/poisson). """ @@ -106,7 +120,8 @@ poisson(ŷ, y) = sum(ŷ .- y .* log.(ŷ)) *1 // size(y,2) """ hinge(ŷ, y) -Measures the loss given the prediction ŷ and true labels y(containing 1 or -1). + +Measures the loss given the prediction `ŷ` and true labels `y` (containing 1 or -1). [Hinge Loss](https://en.wikipedia.org/wiki/Hinge_loss). """ hinge(ŷ, y) = sum(max.(0, 1 .- ŷ .* y)) *1 // size(y,2) From b6c79b38b4bf54aba0ee096b38afd1180ad1ee55 Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Wed, 26 Feb 2020 13:48:27 +0100 Subject: [PATCH 19/23] add DataLoader special case train! for the unsupervised data iterator --- Manifest.toml | 2 +- Project.toml | 5 +- docs/make.jl | 4 +- docs/src/data/dataloader.md | 6 +++ docs/src/training/training.md | 19 +++++-- src/Flux.jl | 1 + src/data/Data.jl | 10 ++++ src/data/dataloader.jl | 88 +++++++++++++++++++++++++++++++++ src/optimise/train.jl | 19 ++++--- test/data.jl | 93 ++++++++++++++++++++++++++++------- test/runtests.jl | 59 ++++++++++++++-------- 11 files changed, 253 insertions(+), 53 deletions(-) create mode 100644 docs/src/data/dataloader.md create mode 100644 src/data/dataloader.jl diff --git a/Manifest.toml b/Manifest.toml index 693f7ca2..788e5354 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -252,7 +252,7 @@ uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" version = "1.1.0" [[Pkg]] -deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [[Printf]] diff --git a/Project.toml b/Project.toml index 71282a10..bd105730 100644 --- a/Project.toml +++ b/Project.toml @@ -40,7 +40,10 @@ julia = "1" [extras] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + [targets] -test = ["Test", "Documenter"] +test = ["Test", "Documenter", "IterTools", "LinearAlgebra"] diff --git a/docs/make.jl b/docs/make.jl index fe3544fc..0d597500 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,10 +15,12 @@ makedocs(modules=[Flux, NNlib], "Regularisation" => "models/regularisation.md", "Model Reference" => "models/layers.md", "NNlib" => "models/nnlib.md"], + "Handling Data" => + ["One-Hot Encoding" => "data/onehot.md", + "DataLoader" => "data/dataloader.md"], "Training Models" => ["Optimisers" => "training/optimisers.md", "Training" => "training/training.md"], - "One-Hot Encoding" => "data/onehot.md", "GPU Support" => "gpu.md", "Saving & Loading" => "saving.md", "Performance Tips" => "performance.md", diff --git a/docs/src/data/dataloader.md b/docs/src/data/dataloader.md new file mode 100644 index 00000000..70a883c9 --- /dev/null +++ b/docs/src/data/dataloader.md @@ -0,0 +1,6 @@ +# DataLoader +Flux provides the `DataLoader` type in the `Flux.Data` module to handle iteration over mini-batches of data. + +```@docs +Flux.Data.DataLoader +``` \ No newline at end of file diff --git a/docs/src/training/training.md b/docs/src/training/training.md index b42db7c9..64b2b5e8 100644 --- a/docs/src/training/training.md +++ b/docs/src/training/training.md @@ -7,10 +7,10 @@ To actually train a model we need four things: * A collection of data points that will be provided to the objective function. * An [optimiser](optimisers.md) that will update the model parameters appropriately. -With these we can call `Flux.train!`: +With these we can call `train!`: -```julia -Flux.train!(objective, params, data, opt) +```@docs +Flux.Optimise.train! ``` There are plenty of examples in the [model zoo](https://github.com/FluxML/model-zoo). @@ -56,7 +56,8 @@ data = [(x, y)] ```julia data = [(x, y), (x, y), (x, y)] # Or equivalently -data = Iterators.repeated((x, y), 3) +using IterTools: ncycle +data = ncycle([(x, y)], 3) ``` It's common to load the `x`s and `y`s separately. In this case you can use `zip`: @@ -67,6 +68,14 @@ ys = [rand( 10), rand( 10), rand( 10)] data = zip(xs, ys) ``` +Training data can be conveniently partitioned for mini-batch training using the [`Flux.Data.DataLoader`](@ref) type: + +```julia +X = rand(28, 28, 60000) +Y = rand(0:9, 60000) +data = DataLoader(X, Y, batchsize=128) +``` + Note that, by default, `train!` only loops over the data once (a single "epoch"). A convenient way to run multiple epochs from the REPL is provided by `@epochs`. @@ -120,7 +129,7 @@ An example follows that works similar to the default `Flux.train` but with no ca You don't need callbacks if you just code the calls to your functions directly into the loop. E.g. in the places marked with comments. -``` +```julia function my_custom_train!(loss, ps, data, opt) ps = Params(ps) for d in data diff --git a/src/Flux.jl b/src/Flux.jl index 9969b323..c99e41a1 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -7,6 +7,7 @@ using Zygote, MacroTools, Juno, Reexport, Statistics, Random using MacroTools: @forward @reexport using NNlib using Zygote: Params, @adjoint, gradient, pullback, @nograd + export gradient export Chain, Dense, Maxout, RNN, LSTM, GRU, Conv, CrossCor, ConvTranspose, MaxPool, MeanPool, diff --git a/src/data/Data.jl b/src/data/Data.jl index 88af9549..940b7ea7 100644 --- a/src/data/Data.jl +++ b/src/data/Data.jl @@ -3,6 +3,9 @@ module Data import ..Flux import SHA +using Random: shuffle! +using Base: @propagate_inbounds + export CMUDict, cmudict deps(path...) = joinpath(@__DIR__, "..", "..", "deps", path...) @@ -26,6 +29,9 @@ function __init__() mkpath(deps()) end +include("dataloader.jl") +export DataLoader + include("mnist.jl") export MNIST @@ -42,7 +48,11 @@ using .Sentiment include("iris.jl") export Iris +<<<<<<< HEAD include("housing.jl") export Housing end +======= +end #module +>>>>>>> af20a785... add DataLoader diff --git a/src/data/dataloader.jl b/src/data/dataloader.jl new file mode 100644 index 00000000..baf32a83 --- /dev/null +++ b/src/data/dataloader.jl @@ -0,0 +1,88 @@ +# Adapted from Knet's src/data.jl (author: Deniz Yuret) + +struct DataLoader + data + batchsize::Int + nobs::Int + partial::Bool + imax::Int + indices::Vector{Int} + shuffle::Bool +end + +""" + DataLoader(data...; batchsize=1, shuffle=false, partial=true) + +An object that iterates over mini-batches of `data`, each mini-batch containing `batchsize` observations +(except possibly the last one). + +Takes as input one or more data tensors, e.g. X in unsupervised learning, X and Y in +supervised learning. The last dimension in each tensor is considered to be the observation +dimension. + +If `shuffle=true`, shuffles the observations each time iterations are re-started. +If `partial=false`, drops the last mini-batch if it is smaller than the batchsize. + +Example usage: + + Xtrain = rand(10, 100) + dtrain = DataLoader(Xtrain, batchsize=2) + # iterate over 50 mini-batches + for x in dtrain: + @assert size(x) == (10, 2) + ... + end + + Xtrain = rand(10, 100) + Ytrain = rand(100) + dtrain = DataLoader(Xtrain, Ytrain, batchsize=2, shuffle=true) + for epoch in 1:100 + for (x, y) in dtrain: + @assert size(x) == (10, 2) + @assert size(y) == (2,) + ... + end + end + + # train for 10 epochs + using IterTools: ncycle + Flux.train!(loss, ps, ncycle(dtrain, 10), opt) +""" +function DataLoader(data...; batchsize=1, shuffle=false, partial=true) + length(data) > 0 || throw(ArgumentError("Need at least one data input")) + batchsize > 0 || throw(ArgumentError("Need positive batchsize")) + + nx = size(data[1])[end] + for i=2:length(data) + nx != size(data[i])[end] && throw(DimensionMismatch("All data should contain same number of observations")) + end + if nx < batchsize + @warn "Number of data points less than batchsize, decreasing the batchsize to $nx" + batchsize = nx + end + imax = partial ? nx : nx - batchsize + 1 + ids = 1:min(nx, batchsize) + DataLoader(data, batchsize, nx, partial, imax, [1:nx;], shuffle) +end + +getdata(x::AbstractArray, ids) = x[(Base.Colon() for _=1:ndims(x)-1)..., ids] + +@propagate_inbounds function Base.iterate(d::DataLoader, i=0) # returns data in d.indices[i+1:i+batchsize] + i >= d.imax && return nothing + if d.shuffle && i == 0 + shuffle!(d.indices) + end + nexti = min(i + d.batchsize, d.nobs) + ids = d.indices[i+1:nexti] + if length(d.data) == 1 + batch = getdata(d.data[1], ids) + else + batch = ((getdata(x, ids) for x in d.data)...,) + end + return (batch, nexti) +end + +function Base.length(d::DataLoader) + n = d.nobs / d.batchsize + d.partial ? ceil(Int,n) : floor(Int,n) +end \ No newline at end of file diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 59404a42..34a98394 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -61,13 +61,14 @@ end For each datapoint `d` in `data` computes the gradient of `loss(d...)` through backpropagation and calls the optimizer `opt`. +In case datapoints `d` are of array type, assumes no splatting is needed +and computes the gradient of `loss(d)`. + Takes a callback as keyword argument `cb`. For example, this will print "training" every 10 seconds: -```julia -Flux.train!(loss, params, data, opt, - cb = throttle(() -> println("training"), 10)) -``` + train!(loss, params, data, opt, + cb = throttle(() -> println("training"), 10)) The callback can call `Flux.stop()` to interrupt the training loop. @@ -78,8 +79,14 @@ function train!(loss, ps, data, opt; cb = () -> ()) cb = runall(cb) @progress for d in data try - gs = gradient(ps) do - loss(d...) + if d isa AbstractArray + gs = gradient(ps) do + loss(d) + end + else + gs = gradient(ps) do + loss(d...) + end end update!(opt, ps, gs) cb() diff --git a/test/data.jl b/test/data.jl index 6c012a93..1a090174 100644 --- a/test/data.jl +++ b/test/data.jl @@ -1,28 +1,85 @@ -using Flux.Data -using Test +@testset "DataLoader" begin + X = reshape([1:10;], (2, 5)) + Y = [1:5;] -@test cmudict()["CATASTROPHE"] == :[K,AH0,T,AE1,S,T,R,AH0,F,IY0].args + d = DataLoader(X, batchsize=2) + batches = collect(d) + @test length(batches) == 3 + @test batches[1] == X[:,1:2] + @test batches[2] == X[:,3:4] + @test batches[3] == X[:,5:5] -@test length(CMUDict.phones()) == 39 + d = DataLoader(X, batchsize=2, partial=false) + batches = collect(d) + @test length(batches) == 2 + @test batches[1] == X[:,1:2] + @test batches[2] == X[:,3:4] -@test length(CMUDict.symbols()) == 84 + d = DataLoader(X, Y, batchsize=2) + batches = collect(d) + @test length(batches) == 3 + @test length(batches[1]) == 2 + @test length(batches[2]) == 2 + @test length(batches[3]) == 2 + @test batches[1][1] == X[:,1:2] + @test batches[1][2] == Y[1:2] + @test batches[2][1] == X[:,3:4] + @test batches[2][2] == Y[3:4] + @test batches[3][1] == X[:,5:5] + @test batches[3][2] == Y[5:5] -@test MNIST.images()[1] isa Matrix -@test MNIST.labels() isa Vector{Int64} + # test interaction with `train!` + θ = ones(2) + X = zeros(2, 10) + loss(x) = sum((x .- θ).^2) + d = DataLoader(X) + Flux.train!(loss, [θ], ncycle(d, 10), Descent(0.1)) + @test norm(θ) < 1e-4 -@test FashionMNIST.images()[1] isa Matrix -@test FashionMNIST.labels() isa Vector{Int64} + # test interaction with `train!` + θ = zeros(2) + X = ones(2, 10) + Y = fill(2, 10) + loss(x, y) = sum((y - x'*θ).^2) + d = DataLoader(X, Y) + Flux.train!(loss, [θ], ncycle(d, 10), Descent(0.1)) + @test norm(θ .- 1) < 1e-10 +end -@test Data.Sentiment.train() isa Vector{Data.Tree{Any}} +@testset "CMUDict" begin + @test cmudict()["CATASTROPHE"] == :[K,AH0,T,AE1,S,T,R,AH0,F,IY0].args -@test Iris.features() isa Matrix -@test size(Iris.features()) == (4,150) + @test length(CMUDict.phones()) == 39 -@test Iris.labels() isa Vector{String} -@test size(Iris.labels()) == (150,) + @test length(CMUDict.symbols()) == 84 +end -@test Housing.features() isa Matrix -@test size(Housing.features()) == (506, 13) +@testset "MNIST" begin + @test MNIST.images()[1] isa Matrix + @test MNIST.labels() isa Vector{Int64} +end -@test Housing.targets() isa Array{Float64} -@test size(Housing.targets()) == (506, 1) +@testset "FashionMNIST" begin + @test FashionMNIST.images()[1] isa Matrix + @test FashionMNIST.labels() isa Vector{Int64} +end + +@testset "Sentiment" begin + @test Data.Sentiment.train() isa Vector{Data.Tree{Any}} +end + +@testset "Iris" begin + @test Iris.features() isa Matrix + @test size(Iris.features()) == (4,150) + + @test Iris.labels() isa Vector{String} + @test size(Iris.labels()) == (150,) +end + +@testest "Housing" begin + @test Housing.features() isa Matrix + @test size(Housing.features()) == (506, 13) + + @test Housing.targets() isa Array{Float64} + @test size(Housing.targets()) == (506, 1) +end diff --git a/test/runtests.jl b/test/runtests.jl index 1505e96a..81182f0d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,32 +1,49 @@ -using Flux, Test, Random, Statistics, Documenter -using Random +using Flux +using Flux.Data +using Test +using Random, Statistics, LinearAlgebra +using Documenter +using IterTools: ncycle Random.seed!(0) @testset "Flux" begin -@info "Testing Basics" + @testset "Utils" begin + include("utils.jl") + end -include("utils.jl") -include("onehot.jl") -include("optimise.jl") -include("data.jl") + @testset "Onehot" begin + include("onehot.jl") + end -@info "Testing Layers" + @testset "Optimise" begin + include("optimise.jl") + end -include("layers/basic.jl") -include("layers/normalisation.jl") -include("layers/stateless.jl") -include("layers/conv.jl") + @testset "Data" begin + include("data.jl") + end -if Flux.use_cuda[] - include("cuda/cuda.jl") -else - @warn "CUDA unavailable, not testing GPU support" -end + @testset "Layers" begin + include("layers/basic.jl") + include("layers/normalisation.jl") + include("layers/stateless.jl") + include("layers/conv.jl") + end -if VERSION >= v"1.2" - doctest(Flux) -end + @testset "CUDA" begin + if Flux.use_cuda[] + include("cuda/cuda.jl") + else + @warn "CUDA unavailable, not testing GPU support" + end + end -end + @testset "Docs" begin + if VERSION >= v"1.2" + doctest(Flux) + end + end + +end # testset Flux From 487002878ed530303cf9527e7cca0ea57b34d5b2 Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Thu, 27 Feb 2020 20:49:05 +0100 Subject: [PATCH 20/23] restrict train! special casing --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 34a98394..54b7f53a 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -79,7 +79,7 @@ function train!(loss, ps, data, opt; cb = () -> ()) cb = runall(cb) @progress for d in data try - if d isa AbstractArray + if d isa AbstractArray{<:Number} gs = gradient(ps) do loss(d) end From 97141e8c98fc94feadbe287f45a32b58bd3d515c Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Thu, 27 Feb 2020 20:49:55 +0100 Subject: [PATCH 21/23] improve docstring --- src/optimise/train.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimise/train.jl b/src/optimise/train.jl index 54b7f53a..79ebcc06 100644 --- a/src/optimise/train.jl +++ b/src/optimise/train.jl @@ -61,7 +61,7 @@ end For each datapoint `d` in `data` computes the gradient of `loss(d...)` through backpropagation and calls the optimizer `opt`. -In case datapoints `d` are of array type, assumes no splatting is needed +In case datapoints `d` are of numeric array type, assumes no splatting is needed and computes the gradient of `loss(d)`. Takes a callback as keyword argument `cb`. For example, this will print "training" From a72258ea2a428ce4b12e711395856091f17f9fcc Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Sat, 29 Feb 2020 18:55:49 +0100 Subject: [PATCH 22/23] fix rebase --- src/data/Data.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/data/Data.jl b/src/data/Data.jl index 940b7ea7..16a025a7 100644 --- a/src/data/Data.jl +++ b/src/data/Data.jl @@ -48,11 +48,7 @@ using .Sentiment include("iris.jl") export Iris -<<<<<<< HEAD include("housing.jl") export Housing end -======= -end #module ->>>>>>> af20a785... add DataLoader From a1efc434c21d2e4026e5d4f8764854451bac88c5 Mon Sep 17 00:00:00 2001 From: CarloLucibello Date: Sat, 29 Feb 2020 19:40:44 +0100 Subject: [PATCH 23/23] fix typo --- test/data.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/data.jl b/test/data.jl index 1a090174..c7a8fdfd 100644 --- a/test/data.jl +++ b/test/data.jl @@ -76,7 +76,7 @@ end @test size(Iris.labels()) == (150,) end -@testest "Housing" begin +@testset "Housing" begin @test Housing.features() isa Matrix @test size(Housing.features()) == (506, 13)