diff --git a/src/Flux.jl b/src/Flux.jl index 44864589..fdcd6194 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -19,11 +19,7 @@ include("params.jl") include("compiler/Compiler.jl") using .Compiler: @net -include("layers/chain.jl") -include("layers/affine.jl") -include("layers/activation.jl") -include("layers/cost.jl") - -include("data.jl") +include("layers/stateless.jl") +include("layers/basic.jl") end # module diff --git a/src/data.jl b/src/data.jl deleted file mode 100644 index a50be7a4..00000000 --- a/src/data.jl +++ /dev/null @@ -1,23 +0,0 @@ -""" - onehot('b', ['a', 'b', 'c', 'd']) => [false, true, false, false] - - onehot(Float32, 'c', ['a', 'b', 'c', 'd']) => [0., 0., 1., 0.] - -Produce a one-hot-encoded version of an item, given a list of possible values -for the item. -""" -onehot(T::Type, label, labels) = T[i == label for i in labels] -onehot(label, labels) = onehot(Int, label, labels) - -""" - onecold([0.0, 1.0, 0.0, ...], - ['a', 'b', 'c', ...]) => 'b' - -The inverse of `onehot`; takes an output prediction vector and a list of -possible values, and produces the appropriate value. -""" -onecold(y::AbstractVector, labels = 1:length(y)) = - labels[findfirst(y, maximum(y))] - -onecold(y::AbstractMatrix, l...) = - squeeze(mapslices(y -> onecold(y, l...), y, 2), 2) diff --git a/src/layers/activation.jl b/src/layers/activation.jl deleted file mode 100644 index 5b1a27e2..00000000 --- a/src/layers/activation.jl +++ /dev/null @@ -1,11 +0,0 @@ -# Sigmoid -σ(x) = 1 ./ (1 + exp.(-x)) -# back!(::typeof(σ), Δ, x) = Δ .* σ(x).*(1.-σ(x)) - -# Rectified Linear Unit -relu(x) = max(0, x) -# back!(::typeof(relu), Δ, x) = Δ .* (x .> 0) - -softmax(xs) = exp.(xs) ./ sum(exp.(xs), 2) - -flatten(xs) = reshape(xs, size(xs, 1), :) diff --git a/src/layers/affine.jl b/src/layers/affine.jl deleted file mode 100644 index e7cc862c..00000000 --- a/src/layers/affine.jl +++ /dev/null @@ -1,8 +0,0 @@ -@net type Affine - W - b - x -> x*W .+ b -end - -Affine(in::Integer, out::Integer; init = initn) = - Affine(init(in, out), init(1, out)) diff --git a/src/layers/chain.jl b/src/layers/basic.jl similarity index 68% rename from src/layers/chain.jl rename to src/layers/basic.jl index 0003812c..a866cf4c 100644 --- a/src/layers/chain.jl +++ b/src/layers/basic.jl @@ -1,3 +1,5 @@ +# Chain + type Chain layers::Vector{Any} Chain(xs...) = new([xs...]) @@ -12,3 +14,16 @@ Compiler.graph(s::Chain) = foldl((v, m) -> vertex(m, v), constant(inputnode(1)), s.layers) Base.getindex(c::Chain, i::AbstractArray) = Chain(c.layers[i]...) + +# Affine + +struct Affine{S,T} + W::S + b::T +end + +Affine(in::Integer, out::Integer; init = initn) = + Affine(track(init(out, in)), + track(init(out))) + +(a::Affine)(x) = a.W*x .+ a.b diff --git a/src/layers/cost.jl b/src/layers/cost.jl deleted file mode 100644 index 99c9381a..00000000 --- a/src/layers/cost.jl +++ /dev/null @@ -1,7 +0,0 @@ -mse(ŷ, y) = sumabs2(ŷ .- y)/2 - -# back!(::typeof(mse), Δ, ŷ, y) = Δ .* (ŷ .- y) - -logloss(ŷ, y) = -sum(y .* log.(ŷ)) - -# back!(::typeof(logloss), Δ, ŷ, y) = 0 .- Δ .* y ./ ŷ diff --git a/src/layers/stateless.jl b/src/layers/stateless.jl new file mode 100644 index 00000000..ed871793 --- /dev/null +++ b/src/layers/stateless.jl @@ -0,0 +1,17 @@ +# Activation Functions + +σ(x) = 1 ./ (1 + exp.(-x)) +# back!(::typeof(σ), Δ, x) = Δ .* σ(x).*(1.-σ(x)) + +relu(x) = max(0, x) +# back!(::typeof(relu), Δ, x) = Δ .* (x .> 0) + +softmax(xs) = exp.(xs) ./ sum(exp.(xs), 2) + +# Cost functions + +mse(ŷ, y) = sumabs2(ŷ .- y)/2 +# back!(::typeof(mse), Δ, ŷ, y) = Δ .* (ŷ .- y) + +logloss(ŷ, y) = -sum(y .* log.(ŷ)) +# back!(::typeof(logloss), Δ, ŷ, y) = 0 .- Δ .* y ./ ŷ diff --git a/src/utils.jl b/src/utils.jl index 958d04e4..e2a4a050 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,5 +1,3 @@ -call(f, xs...) = f(xs...) - # Arrays initn(dims...) = randn(dims...)/100 @@ -10,8 +8,31 @@ squeeze(xs, dim = 1) = Base.squeeze(xs, dim) stack(xs, dim = 1) = cat(dim, unsqueeze.(xs, dim)...) unstack(xs, dim = 1) = [slicedim(xs, dim, i) for i = 1:size(xs, dim)] -convertel(T::Type, xs::AbstractArray) = convert.(T, xs) -convertel{T}(::Type{T}, xs::AbstractArray{T}) = xs +""" + onehot('b', ['a', 'b', 'c', 'd']) => [false, true, false, false] + + onehot(Float32, 'c', ['a', 'b', 'c', 'd']) => [0., 0., 1., 0.] + +Produce a one-hot-encoded version of an item, given a list of possible values +for the item. +""" +onehot(T::Type, label, labels) = T[i == label for i in labels] +onehot(label, labels) = onehot(Int, label, labels) + +""" + onecold([0.0, 1.0, 0.0, ...], + ['a', 'b', 'c', ...]) => 'b' + +The inverse of `onehot`; takes an output prediction vector and a list of +possible values, and produces the appropriate value. +""" +onecold(y::AbstractVector, labels = 1:length(y)) = + labels[findfirst(y, maximum(y))] + +onecold(y::AbstractMatrix, l...) = + squeeze(mapslices(y -> onecold(y, l...), y, 2), 2) + +flatten(xs) = reshape(xs, size(xs, 1), :) a ∘ b = a .* b