diff --git a/src/Flux.jl b/src/Flux.jl index 3904f52f..39e653a5 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -7,7 +7,6 @@ using MacroTools, Lazy, Flow export Model, back!, update! abstract Model -abstract Activation <: Model back!(m::Model, ∇) = error("Backprop not implemented for $(typeof(m))") update!(m::Model, η) = m diff --git a/src/activation.jl b/src/activation.jl index a9832c7e..0bc6f466 100644 --- a/src/activation.jl +++ b/src/activation.jl @@ -1,28 +1 @@ -export Sigmoid - -σ(x) = 1/(1+exp(-x)) -σ′(x) = σ(x)*(1-σ(x)) - -∇₁(::typeof(σ)) = σ′ - -type Sigmoid <: Activation - in::Vector{Float32} - out::Vector{Float32} - ∇in::Vector{Float32} -end - -Sigmoid(size::Integer) = Sigmoid(zeros(size), zeros(size), zeros(size)) - -function (l::Sigmoid)(x) - l.in = x - map!(σ, l.out, x) -end - -function back!(l::Sigmoid, ∇) - map!(σ′, l.∇in, l.in) - map!(*, l.∇in, l.∇in, ∇) -end - -shape(l::Sigmoid) = length(l.in) - -Sigmoid() = Init(in -> Sigmoid(in[1])) +abstract Activation <: Model diff --git a/src/cost.jl b/src/cost.jl index 62772894..e69de29b 100644 --- a/src/cost.jl +++ b/src/cost.jl @@ -1,8 +0,0 @@ -export mse, mse! - -function mse!(∇, pred, target) - map!(-, ∇, pred, target) - sumabs2(∇)/2 -end - -mse(pred, target) = mse(similar(pred), pred, target) diff --git a/src/layers/dense.jl b/src/layers/dense.jl index deb2ce93..e69de29b 100644 --- a/src/layers/dense.jl +++ b/src/layers/dense.jl @@ -1,41 +0,0 @@ -export Dense - -type Dense <: Model - W::Matrix{Float32} - b::Vector{Float32} - ∇W::Matrix{Float32} - ∇b::Vector{Float32} - - in::Vector{Float32} - out::Vector{Float32} - ∇in::Vector{Float32} -end - -Dense(in::Integer, out::Integer) = - Dense(randn(out, in), randn(out), - zeros(out, in), zeros(out), - zeros(in), zeros(out), zeros(in)) - -Dense(out::Integer) = Init(in -> Dense(in[1], out)) - -function (l::Dense)(x) - l.in = x - A_mul_B!(l.out, l.W, x) - map!(+, l.out, l.out, l.b) -end - -function back!(l::Dense, ∇) - map!(+, l.∇b, l.∇b, ∇) - # l.∇W += ∇ * l.in' - BLAS.gemm!('N', 'T', eltype(∇)(1), ∇, l.in, eltype(∇)(1), l.∇W) - At_mul_B!(l.∇in, l.W, ∇) -end - -function update!(l::Dense, η) - map!((x, ∇x) -> x - η*∇x, l.W, l.W, l.∇W) - map!((x, ∇x) -> x - η*∇x, l.b, l.b, l.∇b) - fill!(l.∇W, 0) - fill!(l.∇b, 0) -end - -shape(d::Dense) = size(d.b) diff --git a/src/layers/input.jl b/src/layers/input.jl index b47dc6cf..58272e34 100644 --- a/src/layers/input.jl +++ b/src/layers/input.jl @@ -10,7 +10,7 @@ type Input{N} <: Model dims::Dims{N} end -Input(i) = Input(dims(i)) +Input(i...) = Input(dims(i...)) (::Input)(x) = x back!(::Input, ∇) = ∇ diff --git a/src/layers/sequence.jl b/src/layers/sequence.jl index fc1c4f6c..8297af77 100644 --- a/src/layers/sequence.jl +++ b/src/layers/sequence.jl @@ -1,6 +1,6 @@ export Sequence -type Sequence +type Sequence <: Model layers::Vector{Model} end diff --git a/src/utils.jl b/src/utils.jl index 57978e75..7ce83918 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -2,26 +2,3 @@ export onehot, onecold onehot(label, labels) = [i == label for i in labels] onecold(pred, labels = 1:length(pred)) = labels[findfirst(pred, maximum(pred))] - -function train!(m::Model, train, test = []; epoch = 1, batch = 10, η = 0.1) - i = 0 - ∇ = zeros(length(train[1][2])) - for _ in 1:epoch - for (x, y) in shuffle!(train) - i += 1 - err = mse!(∇, m(x), y) - back!(m, ∇) - i % batch == 0 && update!(m, η/batch) - end - @show accuracy(m, test) - end - return m -end - -function accuracy(m::Model, data) - correct = 0 - for (x, y) in data - onecold(m(x)) == onecold(y) && (correct += 1) - end - return correct/length(data) -end