remove all
This commit is contained in:
parent
1010b18084
commit
dcbc77980e
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,5 +0,0 @@
|
||||
*.jl.cov
|
||||
*.jl.*.cov
|
||||
*.jl.mem
|
||||
docs/build/
|
||||
docs/site/
|
17
.travis.yml
17
.travis.yml
@ -1,17 +0,0 @@
|
||||
# Documentation: http://docs.travis-ci.com/user/languages/julia/
|
||||
language: julia
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
julia:
|
||||
- 0.5
|
||||
notifications:
|
||||
email: false
|
||||
# uncomment the following lines to override the default test script
|
||||
script:
|
||||
- if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
|
||||
- julia -e 'Pkg.clone("https://github.com/MikeInnes/DataFlow.jl")'
|
||||
- julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)'
|
||||
# after_success:
|
||||
- julia -e 'Pkg.add("Documenter")'
|
||||
- julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))'
|
22
LICENSE.md
22
LICENSE.md
@ -1,22 +0,0 @@
|
||||
The Flux.jl package is licensed under the MIT "Expat" License:
|
||||
|
||||
> Copyright (c) 2016: Mike Innes.
|
||||
>
|
||||
> Permission is hereby granted, free of charge, to any person obtaining
|
||||
> a copy of this software and associated documentation files (the
|
||||
> "Software"), to deal in the Software without restriction, including
|
||||
> without limitation the rights to use, copy, modify, merge, publish,
|
||||
> distribute, sublicense, and/or sell copies of the Software, and to
|
||||
> permit persons to whom the Software is furnished to do so, subject to
|
||||
> the following conditions:
|
||||
>
|
||||
> The above copyright notice and this permission notice shall be
|
||||
> included in all copies or substantial portions of the Software.
|
||||
>
|
||||
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
50
README.md
50
README.md
@ -1,50 +0,0 @@
|
||||
# Флукс
|
||||
|
||||
[](https://travis-ci.org/MikeInnes/Flux.jl)
|
||||
|
||||
Flux is a high-level API for machine learning, implemented in Julia.
|
||||
|
||||
Flux aims to provide a concise and expressive syntax for architectures that are hard to express within other frameworks. The notation should be familiar and extremely close to what you'd find in a paper or description of the model.
|
||||
|
||||
The current focus is on ANNs with TensorFlow or MXNet as a backend. While it's in a very early working-prototype stage, you can see what works so far in the [examples folder](/examples).
|
||||
|
||||
## Brief Examples
|
||||
|
||||
Simple multi-layer-perceptron for MNIST:
|
||||
|
||||
```julia
|
||||
Chain(
|
||||
Input(784),
|
||||
Affine(128), relu,
|
||||
Affine( 64), relu,
|
||||
Affine( 10), softmax)
|
||||
```
|
||||
|
||||
LSTM example:
|
||||
|
||||
```julia
|
||||
@net type LSTM
|
||||
Wxf; Wyf; bf
|
||||
Wxi; Wyi; bi
|
||||
Wxo; Wyo; bo
|
||||
Wxc; Wyc; bc
|
||||
y; state
|
||||
function (x)
|
||||
# Gates
|
||||
forget = σ( x * Wxf + y{-1} * Wyf + bf )
|
||||
input = σ( x * Wxi + y{-1} * Wyi + bi )
|
||||
output = σ( x * Wxo + y{-1} * Wyo + bo )
|
||||
# State update and output
|
||||
state′ = tanh( x * Wxc + y{-1} * Wyc + bc )
|
||||
state = forget .* state{-1} + input .* state′
|
||||
y = output .* tanh(state)
|
||||
end
|
||||
end
|
||||
|
||||
Chain(
|
||||
Input(N),
|
||||
LSTM(N, 256),
|
||||
LSTM(256, 256),
|
||||
Affine(256, N),
|
||||
softmax)
|
||||
```
|
17
docs/make.jl
17
docs/make.jl
@ -1,17 +0,0 @@
|
||||
using Documenter, Flux
|
||||
|
||||
makedocs(modules=Module[Flux],
|
||||
doctest=false, clean=true,
|
||||
format = :html,
|
||||
sitename="Flux Documentation",
|
||||
pages = [
|
||||
"Home" => "index.md",
|
||||
])
|
||||
|
||||
deploydocs(
|
||||
repo = "github.com/MikeInnes/Flux.jl.git",
|
||||
target = "build",
|
||||
osname = "linux",
|
||||
julia = "0.5",
|
||||
deps = nothing,
|
||||
make = nothing)
|
@ -1 +0,0 @@
|
||||
# Flux
|
@ -1,22 +0,0 @@
|
||||
using Flux, MNIST
|
||||
|
||||
data = [(Vector{Float32}(trainfeatures(i)), onehot(Float32, trainlabel(i), 0:9)) for i = 1:60_000]
|
||||
train = data[1:50_000]
|
||||
test = data[50_001:60_000]
|
||||
|
||||
m = Chain(
|
||||
Input(784),
|
||||
Affine(128), relu,
|
||||
Affine( 64), relu,
|
||||
Affine( 10), softmax)
|
||||
|
||||
# Convert to TensorFlow
|
||||
model = tf(m)
|
||||
|
||||
# An example prediction pre-training
|
||||
model(data[1][1])
|
||||
|
||||
@time Flux.train!(model, train, test, η = 1e-3)
|
||||
|
||||
# An example prediction post-training
|
||||
model(data[1][1])
|
@ -1,26 +0,0 @@
|
||||
using Flux
|
||||
|
||||
# Traditional Approach
|
||||
|
||||
# 100 samples of sequences of 15 28×28 3-colour images
|
||||
rand(100, 15, 28, 28, 3)
|
||||
|
||||
# Basic Batching
|
||||
|
||||
data = Batch([collect(reshape(9(i-1):9i-1, 3, 3)) for i = 1:10])
|
||||
|
||||
Batch(flatten.(data))
|
||||
|
||||
data |> structure
|
||||
|
||||
Batch(flatten.(data)) |> structure
|
||||
|
||||
# Nested Batching
|
||||
|
||||
# DNA seqence, encoded as a list of [A, T, G, C]
|
||||
x1 = Seq([[0,1,0,0], [1,0,0,0], [0,0,0,1]])
|
||||
x2 = Seq([[0,0,1,0], [0,0,0,1], [0,0,1,0]])
|
||||
|
||||
data = Batch([x1, x2])
|
||||
|
||||
data |> structure
|
@ -1,38 +0,0 @@
|
||||
using Flux
|
||||
import StatsBase: wsample
|
||||
|
||||
nunroll = 50
|
||||
nbatch = 50
|
||||
|
||||
getseqs(chars, alphabet) = sequences((onehot(Float32, char, alphabet) for char in chars), nunroll)
|
||||
getbatches(chars, alphabet) = batches((getseqs(part, alphabet) for part in chunk(chars, nbatch))...)
|
||||
|
||||
input = readstring("$(homedir())/Downloads/shakespeare_input.txt")
|
||||
alphabet = unique(input)
|
||||
N = length(alphabet)
|
||||
|
||||
Xs, Ys = getbatches(input, alphabet), getbatches(input[2:end], alphabet)
|
||||
|
||||
model = Chain(
|
||||
Input(N),
|
||||
LSTM(N, 256),
|
||||
LSTM(256, 256),
|
||||
Affine(256, N),
|
||||
softmax)
|
||||
|
||||
m = tf(unroll(model, nunroll))
|
||||
|
||||
@time Flux.train!(m, Xs, Ys, η = 0.1, epoch = 1)
|
||||
|
||||
string(map(c -> onecold(c, alphabet), m(first(first(Xs))))...)
|
||||
|
||||
function sample(model, n, temp = 1)
|
||||
s = [rand(alphabet)]
|
||||
m = tf(unroll(model, 1))
|
||||
for i = 1:n
|
||||
push!(s, wsample(alphabet, softmax(m(Seq((onehot(Float32, s[end], alphabet),)))[1]./temp)))
|
||||
end
|
||||
return string(s...)
|
||||
end
|
||||
|
||||
sample(model, 100)
|
@ -1,70 +0,0 @@
|
||||
using Flux, Juno
|
||||
|
||||
# Flux aims to provide high-level APIs that work well across backends, but in
|
||||
# some cases you may want to take advantage of features specific to a given
|
||||
# backend (or Flux may simply not have an implementation of that feature yet).
|
||||
# In these cases it's easy to "drop down" and use the backend's API directly,
|
||||
# where appropriate.
|
||||
|
||||
# In this example, both things are happening; firstly, Flux doesn't yet support
|
||||
# ConvNets in the pure-Julia backend, but this is invisible thanks to the use of
|
||||
# a simple "shim" type, `Conv2D`. This is provided by the library but could easily
|
||||
# have been user-defined.
|
||||
|
||||
# Secondly, we want to take advantage of TensorFlow.jl's training process and
|
||||
# optimisers. We can simply call `Tensor` exactly as we would on a regular
|
||||
# TensorFlow model, and the rest of the process trivially follows
|
||||
# TensorFlow.jl's usual API.
|
||||
|
||||
conv1 = Chain(
|
||||
Reshape(28,28,1),
|
||||
Conv2D((5,5), out = 20), tanh,
|
||||
MaxPool((2,2), stride = (2,2)))
|
||||
|
||||
conv2 = Chain(
|
||||
Input(12,12,20),
|
||||
Conv2D((5,5), in = 20, out = 50), tanh,
|
||||
MaxPool((2,2), stride = (2,2)))
|
||||
|
||||
lenet = Chain(
|
||||
conv1, conv2, flatten,
|
||||
Affine(500), tanh,
|
||||
Affine(10), softmax)
|
||||
|
||||
#--------------------------------------------------------------------------------
|
||||
|
||||
# Now we can continue exactly as in plain TensorFlow, following
|
||||
# https://github.com/malmaud/TensorFlow.jl/blob/master/examples/mnist_full.jl
|
||||
# (taking only the training and cost logic, not the graph building steps)
|
||||
|
||||
using TensorFlow, Distributions
|
||||
|
||||
include(Pkg.dir("TensorFlow", "examples", "mnist_loader.jl"))
|
||||
loader = DataLoader()
|
||||
|
||||
session = Session(Graph())
|
||||
|
||||
x = placeholder(Float32)
|
||||
y′ = placeholder(Float32)
|
||||
y = Tensor(lenet, x)
|
||||
|
||||
cross_entropy = reduce_mean(-reduce_sum(y′.*log(y), reduction_indices=[2]))
|
||||
|
||||
train_step = train.minimize(train.AdamOptimizer(1e-4), cross_entropy)
|
||||
|
||||
accuracy = reduce_mean(cast(indmax(y, 2) .== indmax(y′, 2), Float32))
|
||||
|
||||
run(session, initialize_all_variables())
|
||||
|
||||
@progress for i in 1:1000
|
||||
batch = next_batch(loader, 50)
|
||||
if i%100 == 1
|
||||
train_accuracy = run(session, accuracy, Dict(x=>batch[1], y′=>batch[2]))
|
||||
info("step $i, training accuracy $train_accuracy")
|
||||
end
|
||||
run(session, train_step, Dict(x=>batch[1], y′=>batch[2]))
|
||||
end
|
||||
|
||||
testx, testy = load_test_set()
|
||||
test_accuracy = run(session, accuracy, Dict(x=>testx, y′=>testy))
|
||||
info("test accuracy $test_accuracy")
|
@ -1,52 +0,0 @@
|
||||
# Based on https://arxiv.org/abs/1409.0473
|
||||
|
||||
using Flux
|
||||
using Flux: flip
|
||||
|
||||
# A recurrent model which takes a token and returns a context-dependent
|
||||
# annotation.
|
||||
|
||||
@net type Encoder
|
||||
forward
|
||||
backward
|
||||
token -> hcat(forward(token), backward(token))
|
||||
end
|
||||
|
||||
Encoder(in::Integer, out::Integer) =
|
||||
Encoder(LSTM(in, out÷2), flip(LSTM(in, out÷2)))
|
||||
|
||||
# A recurrent model which takes a sequence of annotations, attends, and returns
|
||||
# a predicted output token.
|
||||
|
||||
@net type Decoder
|
||||
attend
|
||||
recur
|
||||
state; y; N
|
||||
function (anns)
|
||||
energies = map(ann -> exp(attend(hcat(state{-1}, ann))[1]), seq(anns, N))
|
||||
weights = energies./sum(energies)
|
||||
ctx = sum(map((α, ann) -> α .* ann, weights, anns))
|
||||
(_, state), y = recur((state{-1},y{-1}), ctx)
|
||||
y
|
||||
end
|
||||
end
|
||||
|
||||
Decoder(in::Integer, out::Integer; N = 1) =
|
||||
Decoder(Affine(in+out, 1),
|
||||
unroll1(LSTM(in, out)),
|
||||
param(zeros(1, out)), param(zeros(1, out)), N)
|
||||
|
||||
# The model
|
||||
|
||||
Nalpha = 5 # The size of the input token vector
|
||||
Nphrase = 7 # The length of (padded) phrases
|
||||
Nhidden = 12 # The size of the hidden state
|
||||
|
||||
encode = Encoder(Nalpha, Nhidden)
|
||||
decode = Chain(Decoder(Nhidden, Nhidden, N = Nphrase), Affine(Nhidden, Nalpha), softmax)
|
||||
|
||||
model = Chain(
|
||||
unroll(encode, Nphrase, stateful = false),
|
||||
unroll(decode, Nphrase, stateful = false, seq = false))
|
||||
|
||||
xs = Batch([Seq(rand(Float32, Nalpha) for _ = 1:Nphrase)])
|
36
src/Flux.jl
36
src/Flux.jl
@ -1,36 +0,0 @@
|
||||
module Flux
|
||||
|
||||
using MacroTools, Lazy, DataFlow, Juno
|
||||
using DataFlow: graphm, syntax, prewalk!, postwalk!, prewalk, postwalk,
|
||||
iscyclic, Constant, constant, isconstant, group, Split, splitnode,
|
||||
detuple, value, inputs, thread!, value, inputs, Split, splitnode, inputnode,
|
||||
spliceinputs, bumpinputs, Frame
|
||||
using Juno: Tree, Row
|
||||
|
||||
# Zero Flux Given
|
||||
|
||||
include("model.jl")
|
||||
include("utils.jl")
|
||||
include("data.jl")
|
||||
|
||||
include("compiler/code.jl")
|
||||
include("compiler/loops.jl")
|
||||
include("compiler/interp.jl")
|
||||
include("compiler/shape.jl")
|
||||
|
||||
include("layers/affine.jl")
|
||||
include("layers/activation.jl")
|
||||
include("layers/recurrent.jl")
|
||||
include("layers/shape.jl")
|
||||
include("layers/chain.jl")
|
||||
include("layers/shims.jl")
|
||||
|
||||
include("dims/catmat.jl")
|
||||
include("dims/batching.jl")
|
||||
include("dims/seq.jl")
|
||||
|
||||
include("cost.jl")
|
||||
|
||||
include("backend/backend.jl")
|
||||
|
||||
end # module
|
@ -1,11 +0,0 @@
|
||||
export tf
|
||||
|
||||
function loadtf()
|
||||
isdefined(Flux, :TF) && return
|
||||
@eval include(joinpath(dirname($@__FILE__), "tensorflow/tensorflow.jl"))
|
||||
end
|
||||
|
||||
function tf(args...)
|
||||
loadtf()
|
||||
TF.tf(args...)
|
||||
end
|
@ -1,72 +0,0 @@
|
||||
using Base: @get!
|
||||
using DataFlow: Constant, constant, Context, interpret, Split,
|
||||
interpv, ituple, ilambda, iconst, iline, stack, mux
|
||||
using Flux: imap
|
||||
using TensorFlow: RawTensor
|
||||
|
||||
# TODO: implement Julia's type promotion rules
|
||||
|
||||
node(x::Tuple) = map(node, x)
|
||||
node(x::Tensor) = x
|
||||
node(x::Variable) = x
|
||||
node(x::Number) = TensorFlow.constant(Float32(x))
|
||||
|
||||
graph(::typeof(tuple), args...) = (args...,)
|
||||
graph(s::Split, t::Tuple) = t[s.n]
|
||||
graph(::typeof(softmax), x) = nn.softmax(x)
|
||||
graph(::typeof(relu), x) = nn.relu(x)
|
||||
graph(::typeof(σ), x) = nn.sigmoid(x)
|
||||
graph(::typeof(hcat), xs...) = concat(1, xs)
|
||||
graph(::typeof(seq), xs, n) = TensorFlow.unpack(xs, num = n, axis = 1)
|
||||
|
||||
for op in (tanh, *, .*, +, -)
|
||||
@eval graph(::typeof($op), args...) = $op(node(args)...)
|
||||
end
|
||||
|
||||
graph(::typeof(.-), args...) = -(node(args)...)
|
||||
|
||||
# reshape hack due to https://github.com/malmaud/TensorFlow.jl/issues/79
|
||||
batchsize(x::Tensor) = reduce_sum(slice(TensorFlow.shape(x), [0], [1]))
|
||||
graph(::typeof(flatten), x) = reshape(x, pack([batchsize(x), Int32(-1)]))
|
||||
graph(r::Reshape, x) = reshape(x, pack([batchsize(x), map(Int32, r.dims)...]))
|
||||
|
||||
graph(::Input, x) = x
|
||||
|
||||
graph(p::MaxPool, x) =
|
||||
nn.max_pool(x, [1, p.size..., 1], [1, p.stride..., 1], "VALID")
|
||||
|
||||
graph(op::Op, xs...) = op.f(xs...)
|
||||
|
||||
function graph(ctx::Context, model, args...)
|
||||
node = graph(model, interpv(ctx, args)...)
|
||||
isa(node, Tensor) && (ctx[:stacks][node.op.name] = stack(ctx))
|
||||
return node
|
||||
end
|
||||
|
||||
interp(ctx, c::Conv2D, x) =
|
||||
nn.conv2d(interpv(ctx, x), interp(ctx, Constant(c.filter)), [1,c.stride...,1], "VALID")
|
||||
|
||||
interp{T<:AArray}(ctx, p::Constant{Flux.Param{T}}) =
|
||||
haskey(ctx[:params], p.value) ?
|
||||
ctx[:params][p.value] :
|
||||
(ctx[:params][p.value] = Variable(p.value.x))
|
||||
|
||||
interp(ctx, p::Constant) = p.value
|
||||
|
||||
function interp(ctx, model, args...)
|
||||
g = Flux.graph(model)
|
||||
g == nothing && return graph(ctx, model, args...)
|
||||
DataFlow.iscyclic(g) && error("This model has a cycle; try unrolling it first.")
|
||||
interpret(ctx, g, interpv(ctx, args)...)
|
||||
end
|
||||
|
||||
function tograph(model, args...)
|
||||
ctx = Context(mux(iline, ilambda, ituple, imap, interp),
|
||||
params = ObjectIdDict(), stacks = Dict())
|
||||
out = interp(ctx, model, map(constant, args)...)
|
||||
return ctx[:params], ctx[:stacks], out
|
||||
end
|
||||
|
||||
TensorFlow.Tensor(m::Flux.Model, args...) = tograph(m, args...)[2]
|
||||
|
||||
RawTensor(data::Union{Batch,Seq}) = RawTensor(rawbatch(data))
|
@ -1,98 +0,0 @@
|
||||
type Model
|
||||
model::Any
|
||||
session::Session
|
||||
params::Dict{Flux.Param,Tensor}
|
||||
stacks::Dict
|
||||
inputs::Vector{Tensor}
|
||||
output::Any
|
||||
end
|
||||
|
||||
function makesession(model, inputs; session = Session(Graph()))
|
||||
params, stacks, output = tograph(model, inputs...)
|
||||
run(session, initialize_all_variables())
|
||||
Model(model, session, params, stacks, inputs, output)
|
||||
end
|
||||
|
||||
function makesession(model, n::Integer; session = Session(Graph()))
|
||||
makesession(model, [placeholder(Float32) for _ = 1:n], session = session)
|
||||
end
|
||||
|
||||
tf(model) = makesession(model, 1)
|
||||
|
||||
function storeparams!(sess, params)
|
||||
for (p, t) in params
|
||||
p.x = run(sess, t)
|
||||
end
|
||||
end
|
||||
|
||||
storeparams!(m::Model) = storeparams!(m.session, m.params)
|
||||
|
||||
ismultioutput(m::Model) = !isa(m.output, Tensor)
|
||||
|
||||
function batch(xs)
|
||||
dims = ndims(xs)-1
|
||||
T = Array{eltype(xs),dims}
|
||||
B = Array{eltype(xs),dims+1}
|
||||
Batch{T,B}(xs)
|
||||
end
|
||||
|
||||
function tferr(model::Model, e)
|
||||
m = match(r"Node: ([\w\d]+) =", string(e.status))
|
||||
m == nothing && return
|
||||
node = m.captures[1]
|
||||
if haskey(model.stacks, node)
|
||||
stk = model.stacks[node]
|
||||
println("TensorFlow error occured at:")
|
||||
foreach(l -> println("$(l.file):$(l.line)"), stk)
|
||||
end
|
||||
end
|
||||
|
||||
function runmodel(m::Model, args...)
|
||||
@assert length(args) == length(m.inputs)
|
||||
try
|
||||
output = run(m.session, m.output, Dict(zip(m.inputs, args)))
|
||||
ismultioutput(m) ? (batch.(output)...,) : batch(output)
|
||||
catch e
|
||||
isa(e, TensorFlow.TFException) || rethrow(e)
|
||||
tferr(m, e)
|
||||
rethrow(e)
|
||||
end
|
||||
end
|
||||
|
||||
function (m::Model)(args::Batch...)
|
||||
runmodel(m, args...)
|
||||
end
|
||||
|
||||
function (m::Model)(args...)
|
||||
output = m(map(batchone, args)...)
|
||||
ismultioutput(m) ? map(first, output) : first(output)
|
||||
end
|
||||
|
||||
for f in :[back!, update!].args
|
||||
@eval function Flux.$f(m::Model, args...)
|
||||
error($(string(f)) * " is not yet supported on TensorFlow models")
|
||||
end
|
||||
end
|
||||
|
||||
import Juno: info
|
||||
|
||||
function Flux.train!(m::Model, train, test=[]; epoch = 1, η = 0.1,
|
||||
loss = (y, y′) -> reduce_sum((y - y′).^2)/2,
|
||||
opt = TensorFlow.train.GradientDescentOptimizer(η))
|
||||
i = 0
|
||||
Y = placeholder(Float32)
|
||||
Loss = loss(m.output, Y)
|
||||
minimize_op = TensorFlow.train.minimize(opt, Loss)
|
||||
for e in 1:epoch
|
||||
info("Epoch $e\n")
|
||||
@progress for (x, y) in train
|
||||
y, cur_loss, _ = run(m.session, vcat(m.output, Loss, minimize_op),
|
||||
Dict(m.inputs[1]=>batchone(x), Y=>batchone(y)))
|
||||
if i % 5000 == 0
|
||||
@show y
|
||||
@show accuracy(m, test)
|
||||
end
|
||||
i += 1
|
||||
end
|
||||
end
|
||||
end
|
@ -1,83 +0,0 @@
|
||||
# TODO: refactor, some of this is more general than just the TF backend
|
||||
|
||||
type SeqModel
|
||||
m::Model
|
||||
state::Any
|
||||
end
|
||||
|
||||
cgroup(xs...) = DataFlow.group(map(constant, xs)...)
|
||||
|
||||
function makesession(model::Flux.Unrolled)
|
||||
sess = Session(Graph())
|
||||
input = placeholder(Float32)
|
||||
inputs = TensorFlow.unpack(input, num = model.steps, axis = 1)
|
||||
let params, stacks, outputs, instates, outstates
|
||||
if model.stateful
|
||||
instates = [placeholder(Float32) for _ in model.state]
|
||||
params, stacks, (outstates, outputs) = tograph(model, cgroup(instates...), cgroup(inputs...))
|
||||
else
|
||||
params, stacks, outputs = tograph(model, cgroup(inputs...))
|
||||
end
|
||||
output = TensorFlow.pack(outputs, axis = 1)
|
||||
run(sess, initialize_all_variables())
|
||||
sess, params, stacks, (instates, input), (outstates, output)
|
||||
end
|
||||
end
|
||||
|
||||
function tf(model::Flux.Unrolled)
|
||||
sess, params, stacks, (instates, input), (outstates, output) = makesession(model)
|
||||
SeqModel(
|
||||
Model(model, sess, params, stacks,
|
||||
[instates..., input], [outstates..., output]),
|
||||
model.state)
|
||||
end
|
||||
|
||||
function batchseq(xs)
|
||||
dims = ndims(xs)-2
|
||||
T = Array{eltype(xs),dims}
|
||||
S = Array{eltype(xs),dims+1}
|
||||
B = Array{eltype(xs),dims+2}
|
||||
Batch{Seq{T,S},B}(xs)
|
||||
end
|
||||
|
||||
batchseq(xs::Batch) = batchseq(rawbatch(xs))
|
||||
|
||||
TensorFlow.get_tensors(x::Tuple) = TensorFlow.get_tensors(collect(x))
|
||||
|
||||
function (m::SeqModel)(x::BatchSeq)
|
||||
m.m.model.stateful || return batchseq(runmodel(m.m, x)[end])
|
||||
if isempty(m.state) || length(first(m.state)) ≠ length(x)
|
||||
m.state = batchone.(m.m.model.state)
|
||||
end
|
||||
output = runmodel(m.m, m.state..., x)
|
||||
m.state, output = output[1:end-1], output[end]
|
||||
return batchseq(output)
|
||||
end
|
||||
|
||||
(m::SeqModel)(x::Seq) = first(m(batchone(x)))
|
||||
|
||||
function Flux.train!(m::SeqModel, Xs, Ys; epoch = 1, η = 0.1,
|
||||
loss = (y, ŷ) -> -reduce_sum(y .* log(ŷ)),
|
||||
opt = () -> TensorFlow.train.GradientDescentOptimizer(η))
|
||||
batchlen, seqlen = length(first(Xs)), length(first(Xs)[1])
|
||||
state = batchone.(m.m.model.state)
|
||||
sess, params, stacks, (instates, input), (outstates, output) = makesession(m.m.model)
|
||||
Y = placeholder(Float32)
|
||||
Loss = loss(Y, output)/batchlen/seqlen
|
||||
minimize_op = TensorFlow.train.minimize(opt(), Loss)
|
||||
@progress "training" for e in 1:epoch
|
||||
info("Epoch $e\n")
|
||||
@progress "epoch" for (i, (x, y)) in enumerate(zip(Xs,Ys))
|
||||
out = run(sess, vcat(outstates..., output, Loss, minimize_op),
|
||||
merge(Dict(input=>batchone(x), Y=>batchone(y)),
|
||||
Dict(zip(instates, state))))
|
||||
state = out[1:length(state)]
|
||||
loss = out[end-1]
|
||||
isnan(loss) && error("Loss is NaN")
|
||||
isinf(loss) && error("Loss is Inf")
|
||||
(i-1) % 10 == 0 && @show loss
|
||||
end
|
||||
end
|
||||
storeparams!(sess, params)
|
||||
return
|
||||
end
|
@ -1,21 +0,0 @@
|
||||
module TF
|
||||
|
||||
using ..Flux, DataFlow, TensorFlow, Juno
|
||||
import Flux: accuracy
|
||||
|
||||
export tf
|
||||
|
||||
type Op
|
||||
f
|
||||
shape
|
||||
end
|
||||
|
||||
Op(f) = Op(f, (d...) -> nothing)
|
||||
|
||||
Flux.shape(op::Op, d...) = op.shape(d...)
|
||||
|
||||
include("graph.jl")
|
||||
include("model.jl")
|
||||
include("recurrent.jl")
|
||||
|
||||
end
|
@ -1,81 +0,0 @@
|
||||
import DataFlow: mapconst, cse
|
||||
|
||||
export @net, @ml
|
||||
|
||||
function process_func(ex, params = [])
|
||||
@capture(shortdef(ex), (args__,) -> body_)
|
||||
body = @> body MacroTools.flatten liftloops graphm DataFlow.il
|
||||
body = mapconst(x -> x in params ? :(self.$x) : x, body)
|
||||
return args, body
|
||||
end
|
||||
|
||||
function makegraph(graph, args)
|
||||
@assert length(args) == 1
|
||||
graph = prewalk(graph) do v
|
||||
isa(value(v), Constant) && value(v).value == args[1] ?
|
||||
inputnode(1) :
|
||||
v
|
||||
end
|
||||
graph = map(graph) do x
|
||||
isa(x, Offset) ?
|
||||
:(Flux.Offset($(Expr(:quote, x.name)), $(x.n), self.$(x.name))) :
|
||||
x
|
||||
end
|
||||
vertex(:(Flux.Frame(self)), graph)
|
||||
end
|
||||
|
||||
function build_type(T, params)
|
||||
@esc T
|
||||
ex = quote
|
||||
type $T <: Model
|
||||
$(params...)
|
||||
end
|
||||
end
|
||||
if any(x->isexpr(x, Symbol), params)
|
||||
push!(ex.args,
|
||||
:($T($(map(x->isexpr(x, Symbol) ? :($x::AArray) : x, params)...)) =
|
||||
$T($(map(x->isexpr(x, Symbol) ? :(param($x)) : namify(x), params)...))))
|
||||
end
|
||||
ex
|
||||
end
|
||||
|
||||
import Lazy: groupby
|
||||
|
||||
reifyparams(v::IVertex) = mapconst(x -> isa(x, Param) ? x.x : x, v)
|
||||
|
||||
function process_type(ex)
|
||||
@capture(ex, type T_ fs__ end)
|
||||
@destruct [params = false || [],
|
||||
funcs = true || []] = groupby(x->isexpr(x, :->, :function), fs)
|
||||
@assert length(funcs) == 1
|
||||
pnames = namify.(params)
|
||||
args, body = process_func(funcs[1], pnames)
|
||||
@assert length(args) == 1
|
||||
self = esc(:self)
|
||||
quote
|
||||
$(build_type(T, params))
|
||||
$(esc(:(self::$T)))($(args...),) = interpret(reifyparams(graph($self)), $(args...))
|
||||
$(esc(:(Flux.update!(self::$T, η)))) = ($(map(p -> :(update!($self.$p, η)), pnames)...);)
|
||||
$(esc(:(Flux.graph(self::$T)))) = $(DataFlow.constructor(mapconst(esc, makegraph(body, args))))
|
||||
nothing
|
||||
end
|
||||
end
|
||||
|
||||
macro net(ex)
|
||||
isexpr(ex, :type) ? process_type(ex) :
|
||||
isexpr(ex, :->, :function) ? error("@net functions not implemented") :
|
||||
error("Unsupported model expression $ex")
|
||||
end
|
||||
|
||||
function process_anon(ex)
|
||||
args, body = process_func(ex)
|
||||
@assert length(args) == 1
|
||||
:(Flux.Capacitor($(DataFlow.constructor(mapconst(esc, makegraph(body, args))))))
|
||||
end
|
||||
|
||||
macro ml(ex)
|
||||
@capture(shortdef(ex), ((xs__,) -> body_ ) | (f_(xs__,) = body_)) ||
|
||||
error("@ml requires a function definition")
|
||||
ex = process_anon(:($(xs...,) -> $body))
|
||||
f == nothing ? ex : :($(esc(f)) = $ex)
|
||||
end
|
@ -1,26 +0,0 @@
|
||||
using DataFlow: mux, interpret, interpv, ituple, ilambda, iconst, Context
|
||||
|
||||
function astuple(xs::Vertex)
|
||||
isconstant(xs) && isa(value(xs).value, Tuple) ? value(xs).value :
|
||||
isa(xs, Vertex) && value(xs) == tuple ? inputs(xs) :
|
||||
nothing
|
||||
end
|
||||
|
||||
astuple(xs::Tuple) = xs
|
||||
|
||||
astuple(xs) = nothing
|
||||
|
||||
function astuples(xs)
|
||||
xs = [astuple(x) for x in xs]
|
||||
all(x->!(x==nothing), xs) ? xs : nothing
|
||||
end
|
||||
|
||||
function imap(cb, ctx, ::typeof(map), f, xs...)
|
||||
f, xs = interpv(ctx, (f, xs))
|
||||
xs′ = astuples(xs)
|
||||
xs′ ≠ nothing ?
|
||||
group(map(f, xs′...)...) :
|
||||
cb(ctx, map, constant(f), xs...)
|
||||
end
|
||||
|
||||
imap(f, args...) = f(args...)
|
@ -1,134 +0,0 @@
|
||||
export unroll, unroll1
|
||||
|
||||
type Offset
|
||||
name::Symbol
|
||||
n::Int
|
||||
default::Nullable{Param}
|
||||
end
|
||||
|
||||
Offset(name, n) = Offset(name, n, nothing)
|
||||
|
||||
Base.:-(o::Offset) = Offset(o.name, -o.n, o.default)
|
||||
|
||||
function liftloops(ex)
|
||||
ex = DataFlow.normedges(ex)
|
||||
decls = Dict()
|
||||
ex = MacroTools.postwalk(ex) do ex
|
||||
@capture(ex, x_{n_}) || return ex
|
||||
haskey(decls, (x,n)) && return namify(decls[(x,n)])
|
||||
@gensym edge
|
||||
decls[(x,n)] = :($edge = $(Offset(x,n))($x))
|
||||
edge
|
||||
end
|
||||
prepend!(ex.args, collect(values(decls)))
|
||||
ex
|
||||
end
|
||||
|
||||
function hasloops(model)
|
||||
g = graph(model)
|
||||
g == nothing && return false
|
||||
iscyclic(g) && return true
|
||||
result = false
|
||||
map(m -> hasloops(m) && (result = true), g)
|
||||
return result
|
||||
end
|
||||
|
||||
function atomise(model)
|
||||
postwalk(graph(model)) do v
|
||||
hasloops(value(v)) || return v
|
||||
spliceinputs(atomise(value(v)), inputs(v)...)
|
||||
end
|
||||
end
|
||||
|
||||
function collect_state(v::IVertex)
|
||||
state = typeof(v)[]
|
||||
offset = Int[]
|
||||
default = Param[]
|
||||
prewalk!(v) do v
|
||||
isa(value(v), Offset) || return v
|
||||
if (i = findfirst(state, v[1])) == 0
|
||||
push!(state, v[1])
|
||||
push!(offset, max(0, -value(v).n))
|
||||
push!(default, get(value(v).default))
|
||||
else
|
||||
offset[i] = max(offset[i], -value(v).n)
|
||||
end
|
||||
v
|
||||
end
|
||||
return state, offset, default
|
||||
end
|
||||
|
||||
hiddeninput(n) = vertex(Split(n), inputnode(1))
|
||||
|
||||
function create_steps(v::IVertex, n; seq = true, stateful = true)
|
||||
[(stateful ? bumpinputs : copy)(seq ? spliceinputs(v, hiddeninput(i)) : v) for i = 1:n]
|
||||
end
|
||||
|
||||
function getvar(n, step, steps, offset, default; stateful = true)
|
||||
if stateful && step < 1
|
||||
hiddeninput(sum(offset[1:n-1]) + 1 - step)
|
||||
elseif step ∉ 1:length(steps)
|
||||
constant(default[n])
|
||||
else
|
||||
steps[step][1,n]
|
||||
end
|
||||
end
|
||||
|
||||
function stateout(steps, offset, default)
|
||||
outs = []
|
||||
defaults = []
|
||||
for i = 1:length(offset), j = 1:offset[i]
|
||||
push!(outs, getvar(i, length(steps)-j+1, steps, offset, default))
|
||||
push!(defaults, default[i])
|
||||
end
|
||||
group(outs...), defaults
|
||||
end
|
||||
|
||||
function unrollgraph(v::IVertex, n; seq = true, stateful = true)
|
||||
state, offset, default = collect_state(v)
|
||||
v = group(group(state...), v)
|
||||
steps = create_steps(v, n, seq = seq, stateful = stateful)
|
||||
for i = 1:n
|
||||
vars = inputs(steps[i][1])
|
||||
postwalk!(steps[i]) do v
|
||||
isa(value(v), Offset) || return v
|
||||
varid = findfirst(vars,v[1])
|
||||
getvar(varid, value(v).n + i, steps, offset, default, stateful = stateful)
|
||||
end
|
||||
end
|
||||
out = group(map(x->x[2], steps)...)
|
||||
if stateful
|
||||
state, defaults = stateout(steps, offset, default)
|
||||
group(state,out), map(Flux.state, defaults)
|
||||
else
|
||||
out, []
|
||||
end
|
||||
end
|
||||
|
||||
unrollgraph(m, n; kws...) = unrollgraph(atomise(m), n; kws...)
|
||||
|
||||
# TODO: perhaps split into SeqModel + StatefulModel
|
||||
type Unrolled <: Model
|
||||
model
|
||||
graph::IVertex{Any}
|
||||
state::Vector{Any}
|
||||
stateful::Bool
|
||||
steps::Int
|
||||
end
|
||||
|
||||
(m::Unrolled)(xs...) = interpret(reifyparams(m.graph), xs...)
|
||||
|
||||
graph(u::Unrolled) = u.graph
|
||||
|
||||
function unroll(model, n; seq = true, stateful = true)
|
||||
graph, state = unrollgraph(model, n; seq = seq, stateful = stateful)
|
||||
seq || stateful ? Unrolled(model, graph, state, stateful, n) : Capacitor(graph)
|
||||
end
|
||||
|
||||
function unroll1(model)
|
||||
graph, state = unrollgraph(model, 1; seq = false)
|
||||
graph = group(graph[1], map(x->x[1], inputs(graph)[2:end])...)
|
||||
Unrolled(model, graph, state, false, 1)
|
||||
end
|
||||
|
||||
flip(model) = Capacitor(map(x -> isa(x, Offset) ? -x : x, atomise(model)))
|
@ -1,48 +0,0 @@
|
||||
using DataFlow: ilinev, iargs, applylines, Line
|
||||
|
||||
type Hint
|
||||
typ
|
||||
end
|
||||
|
||||
DataFlow.tocall(h::Hint, x) = :($x::$(h.typ))
|
||||
|
||||
function gethint(v::IVertex)
|
||||
while isa(value(v), Union{Line,Frame}) v = v[1] end
|
||||
isa(value(v), Hint) && return value(v).typ
|
||||
return
|
||||
end
|
||||
|
||||
ihint(f, ctx::Context, h::Hint, x) = vertex(h, x)
|
||||
ihint(f, args...) = f(args...)
|
||||
|
||||
hintify(c::Constant) = hintify(state(c.value))
|
||||
hintify(xs::AbstractArray) = vertex(Hint(size(xs)), constant(:_))
|
||||
|
||||
interpshape = mux(ilinev, ihint, iargs, ituple, hintify)
|
||||
|
||||
function hintify(f, xs...)
|
||||
sh = infer(f, map(gethint, xs)...)
|
||||
sh ≠ nothing ? vertex(Hint(sh), vertex(f, xs...)) :
|
||||
!any(x->x==nothing, xs) && graph(f) ≠ nothing ? interpret(Context(interpshape), graph(f), xs...) :
|
||||
vertex(f, xs...)
|
||||
end
|
||||
|
||||
function shapesv(f, args...)
|
||||
(g = graph(f)) == nothing && return
|
||||
ins = [vertex(Hint(d), inputnode(i)) for (i,d) in enumerate(args)]
|
||||
interpret(Context(interpshape), g, ins...)
|
||||
end
|
||||
|
||||
shapes(args...) = shapesv(args...) |> syntax |> applylines |> (x->prettify(x, lines=true))
|
||||
|
||||
# Inference primitives
|
||||
|
||||
infer(f, args...) = graph(f) == nothing ? nothing : gethint(shapesv(f, args...))
|
||||
|
||||
function infer(::typeof(*), a::NTuple{2}, b::NTuple{2})
|
||||
a[2] == b[1] || return nothing
|
||||
(a[1], b[2])
|
||||
end
|
||||
|
||||
# TODO: make correct
|
||||
infer(::typeof(+), a, b) = a
|
@ -1,8 +0,0 @@
|
||||
export mse, mse!
|
||||
|
||||
function mse!(Δ, pred, target)
|
||||
map!(-, Δ, pred, target)
|
||||
sumabs2(Δ)/2
|
||||
end
|
||||
|
||||
mse(pred, target) = mse(similar(pred), pred, target)
|
36
src/data.jl
36
src/data.jl
@ -1,36 +0,0 @@
|
||||
export onehot, onecold, chunk, partition, batches, sequences
|
||||
|
||||
"""
|
||||
onehot('b', ['a', 'b', 'c', 'd']) => [false, true, false, false]
|
||||
|
||||
onehot(Float32, 'c', ['a', 'b', 'c', 'd']) => [0., 0., 1., 0.]
|
||||
|
||||
Produce a one-hot-encoded version of an item, given a list of possible values
|
||||
for the item.
|
||||
"""
|
||||
onehot(T::Type, label, labels) = T[i == label for i in labels]
|
||||
onehot(label, labels) = onehot(Int, label, labels)
|
||||
|
||||
"""
|
||||
onecold([0.0, 1.0, 0.0, ...],
|
||||
['a', 'b', 'c', ...]) => 'b'
|
||||
|
||||
The inverse of `onehot`; takes an output prediction vector and a list of
|
||||
possible values, and produces the appropriate value.
|
||||
"""
|
||||
onecold(pred, labels = 1:length(pred)) = labels[findfirst(pred, maximum(pred))]
|
||||
|
||||
using Iterators
|
||||
import Iterators: partition
|
||||
|
||||
export partition
|
||||
|
||||
Base.length(l::Iterators.Partition) = length(l.xs) ÷ l.step
|
||||
|
||||
_partition(r::UnitRange, step::Integer) = (step*(i-1)+1:step*i for i in 1:(r.stop÷step))
|
||||
_partition(xs, step) = (xs[i] for i in _partition(1:length(xs), step))
|
||||
|
||||
chunk(xs, n) = _partition(xs, length(xs)÷n)
|
||||
|
||||
batches(xs...) = (Batch(x) for x in zip(xs...))
|
||||
sequences(xs, len) = (Seq(x) for x in partition(xs, len))
|
@ -1,21 +0,0 @@
|
||||
export Batch, batchone
|
||||
|
||||
immutable Batch{T,S} <: AbstractVector{T}
|
||||
data::CatMat{T,S}
|
||||
end
|
||||
|
||||
@forward Batch.data size, eltype, getindex, setindex!, rawbatch
|
||||
|
||||
Batch(xs) = Batch(CatMat(xs))
|
||||
|
||||
convert{T,S}(::Type{Batch{T,S}},storage::S) =
|
||||
Batch{T,S}(storage)
|
||||
|
||||
batchone(x) = Batch((x,))
|
||||
batchone(x::Batch) = x
|
||||
|
||||
@render Juno.Inline b::Batch begin
|
||||
Tree(Row(Text("Batch of "), eltype(b),
|
||||
Juno.fade("[$(length(b))]")),
|
||||
Juno.trim(collect(b)))
|
||||
end
|
@ -1,50 +0,0 @@
|
||||
import Base: eltype, size, getindex, setindex!, convert
|
||||
|
||||
export CatMat, rawbatch
|
||||
|
||||
immutable CatMat{T,S} <: AbstractVector{T}
|
||||
data::S
|
||||
end
|
||||
|
||||
convert{T,S}(::Type{CatMat{T,S}},storage::S) =
|
||||
CatMat{T,S}(storage)
|
||||
|
||||
eltype{T}(::CatMat{T}) = T
|
||||
|
||||
size(b::CatMat) = (size(b.data, 1),)
|
||||
|
||||
getindex(b::CatMat, i)::eltype(b) = slicedim(b.data, 1, i)
|
||||
|
||||
setindex!(b::CatMat, v, i) = b[i, :] = v
|
||||
|
||||
allequal(xs) = all(x -> x == first(xs), xs)
|
||||
|
||||
function (::Type{CatMat{T,S}}){T,S}(xs, storage::S)
|
||||
@assert @>> xs map(size) allequal
|
||||
@assert size(storage) == (length(xs), size(first(xs))...)
|
||||
for i = 1:length(xs)
|
||||
storage[i, :] = xs[i]
|
||||
end
|
||||
return CatMat{T,S}(storage)
|
||||
end
|
||||
|
||||
function (::Type{CatMat{T}}){T}(xs)
|
||||
xs′ = map(rawbatch, xs)
|
||||
storage = similar(first(xs′), (length(xs′), size(first(xs′))...))
|
||||
CatMat{T,typeof(storage)}(xs′, storage)
|
||||
end
|
||||
|
||||
function CatMat(xs)
|
||||
xs = promote(xs...)
|
||||
CatMat{eltype(xs)}(xs)
|
||||
end
|
||||
|
||||
@render Juno.Inline b::CatMat begin
|
||||
Tree(Row(Text("CatMat of "), eltype(b),
|
||||
Juno.fade("[$(length(b))]")),
|
||||
Juno.trim(collect(b)))
|
||||
end
|
||||
|
||||
rawbatch(xs) = xs
|
||||
|
||||
rawbatch(xs::CatMat) = xs.data
|
@ -1,20 +0,0 @@
|
||||
export seq, Seq, BatchSeq
|
||||
|
||||
immutable Seq{T,S} <: AbstractVector{T}
|
||||
data::CatMat{T,S}
|
||||
end
|
||||
|
||||
@forward Seq.data size, eltype, getindex, setindex!, rawbatch
|
||||
|
||||
Seq(xs) = Seq(CatMat(xs))
|
||||
|
||||
convert{T,S}(::Type{Seq{T,S}},storage::S) =
|
||||
Seq{T,S}(storage)
|
||||
|
||||
@render Juno.Inline b::Seq begin
|
||||
Tree(Row(Text("Seq of "), eltype(b),
|
||||
Juno.fade("[$(length(b))]")),
|
||||
Juno.trim(collect(b)))
|
||||
end
|
||||
|
||||
typealias BatchSeq{T<:Seq} Batch{T}
|
@ -1,18 +0,0 @@
|
||||
export σ, relu, softmax, flatten
|
||||
|
||||
σ(x) = 1 ./ (1 + exp.(-x))
|
||||
|
||||
back!(::typeof(σ), Δ, x) = Δ .* σ(x)./(1.-σ(x))
|
||||
|
||||
relu(x) = max(0, x)
|
||||
|
||||
back!(::typeof(relu), Δ, x) = Δ .* (x .< 0)
|
||||
|
||||
softmax(xs) = exp.(xs) ./ sum(exp.(xs))
|
||||
|
||||
flatten(xs) = reshape(xs, length(xs))
|
||||
|
||||
shape(::typeof(flatten), in) = prod(in)
|
||||
|
||||
infer(::typeof(softmax), x) = x
|
||||
infer(::typeof(σ), x) = x
|
@ -1,20 +0,0 @@
|
||||
export Affine
|
||||
|
||||
# TODO: type hints for parameters
|
||||
|
||||
@net type Affine
|
||||
W
|
||||
b
|
||||
x -> x*W + b
|
||||
end
|
||||
|
||||
Affine(in::Integer, out::Integer; init = initn) =
|
||||
Affine(init(in, out), init(1, out))
|
||||
|
||||
@net type Sigmoid
|
||||
layer::Model
|
||||
x -> σ(layer(x))
|
||||
end
|
||||
|
||||
Sigmoid(in::Integer, out::Integer; init = randn) =
|
||||
Sigmoid(Affine(in, out, init = init))
|
@ -1,32 +0,0 @@
|
||||
export Chain
|
||||
|
||||
function inferchain(ms)
|
||||
chain = []
|
||||
sh = nothing
|
||||
for m in ms
|
||||
m = init(m, single(sh))
|
||||
sh = shape(m, sh)
|
||||
push!(chain, m)
|
||||
end
|
||||
return chain, sh
|
||||
end
|
||||
|
||||
type Chain <: Model
|
||||
layers::Vector{Any}
|
||||
shape
|
||||
function Chain(ms...)
|
||||
ms, shape = inferchain(ms)
|
||||
return new(ms, shape)
|
||||
end
|
||||
end
|
||||
|
||||
@forward Chain.layers Base.getindex, Base.first, Base.last
|
||||
|
||||
(s::Chain)(x) = foldl((x, m) -> m(x), x, s.layers)
|
||||
back!(s::Chain, Δ) = foldr((m, Δ) -> back!(m, Δ), Δ, s.layers)
|
||||
update!(s::Chain, η) = foreach(l -> update!(l, η), s.layers)
|
||||
|
||||
graph(s::Chain) =
|
||||
foldl((v, m) -> vertex(m, v), constant(inputnode(1)), s.layers)
|
||||
|
||||
shape(c::Chain, in) = c.shape
|
@ -1,51 +0,0 @@
|
||||
export Recurrent, GatedRecurrent, LSTM
|
||||
|
||||
@net type Recurrent
|
||||
Wxy; Wyy; by
|
||||
y
|
||||
function (x)
|
||||
y = tanh( x * Wxy + y{-1} * Wyy + by )
|
||||
end
|
||||
end
|
||||
|
||||
Recurrent(in, out; init = initn) =
|
||||
Recurrent(init((in, out)), init((out, out)), init(out), init(out))
|
||||
|
||||
@net type GatedRecurrent
|
||||
Wxr; Wyr; br
|
||||
Wxu; Wyu; bu
|
||||
Wxh; Wyh; bh
|
||||
y
|
||||
function (x)
|
||||
reset = σ( x * Wxr + y{-1} * Wyr + br )
|
||||
update = σ( x * Wxu + y{-1} * Wyu + bu )
|
||||
y′ = tanh( x * Wxh + (reset .* y{-1}) * Wyh + bh )
|
||||
y = (1 .- update) .* y′ + update .* y{-1}
|
||||
end
|
||||
end
|
||||
|
||||
GatedRecurrent(in, out; init = initn) =
|
||||
GatedRecurrent(vcat([[init((in, out)), init((out, out)), init(out)] for _ = 1:3]...)...,
|
||||
zeros(Float32, out))
|
||||
|
||||
@net type LSTM
|
||||
Wxf; Wyf; bf
|
||||
Wxi; Wyi; bi
|
||||
Wxo; Wyo; bo
|
||||
Wxc; Wyc; bc
|
||||
y; state
|
||||
function (x)
|
||||
# Gates
|
||||
forget = σ( x * Wxf + y{-1} * Wyf + bf )
|
||||
input = σ( x * Wxi + y{-1} * Wyi + bi )
|
||||
output = σ( x * Wxo + y{-1} * Wyo + bo )
|
||||
# State update and output
|
||||
state′ = tanh( x * Wxc + y{-1} * Wyc + bc )
|
||||
state = forget .* state{-1} + input .* state′
|
||||
y = output .* tanh(state)
|
||||
end
|
||||
end
|
||||
|
||||
LSTM(in, out; init = initn) =
|
||||
LSTM(vcat([[init((in, out)), init((out, out)), init((1, out))] for _ = 1:4]...)...,
|
||||
zeros(Float32, out), zeros(Float32, out))
|
@ -1,47 +0,0 @@
|
||||
export Input
|
||||
|
||||
typealias Dims{N} NTuple{N,Int}
|
||||
|
||||
dims(d::Dims) = d
|
||||
|
||||
dims(i...) = (i...,)
|
||||
|
||||
single(i) = i
|
||||
single(i::Dims) = length(i) == 1 ? first(i) : i
|
||||
|
||||
# Shim for kicking off shape inference
|
||||
|
||||
type ShapeError <: Exception
|
||||
layer
|
||||
shape
|
||||
end
|
||||
|
||||
type Input{N} <: Model
|
||||
dims::Dims{N}
|
||||
end
|
||||
|
||||
Input(i...) = Input(dims(i...))
|
||||
|
||||
(::Input)(x) = x
|
||||
back!(::Input, Δ, x) = Δ
|
||||
|
||||
# Initialise placeholder
|
||||
|
||||
type Init{F}
|
||||
f::F
|
||||
end
|
||||
|
||||
init(i::Init, input...) = i.f(input...)
|
||||
init(m, input...) = m
|
||||
|
||||
# Shape inference API
|
||||
|
||||
shape(x, in) = in
|
||||
|
||||
shape(i::Input, _) = i.dims
|
||||
|
||||
# Implementation for bundled layers
|
||||
|
||||
shape(d::Affine, _) = length(state(d.b)) # TODO: could perhaps infer this
|
||||
|
||||
Affine(out::Integer) = Init(in::Integer -> Affine(in, out))
|
@ -1,44 +0,0 @@
|
||||
export Conv2D, MaxPool, Reshape
|
||||
|
||||
type Conv2D <: Model
|
||||
filter::Param{Array{Float32,4}} # [height, width, inchans, outchans]
|
||||
stride::Dims{2}
|
||||
end
|
||||
|
||||
Conv2D(size; in = 1, out = 1, stride = (1,1), init = initn) =
|
||||
Conv2D(param(initn(size..., in, out)), stride)
|
||||
|
||||
shape(c::Conv2D, in::Dims{2}) =
|
||||
(map(i -> (in[i]-size(c.filter,i))÷c.stride[i]+1, (1,2))..., size(c.filter, 4))
|
||||
|
||||
shape(c::Conv2D, in::Dims{3}) =
|
||||
shape(c, (in[1],in[2]))
|
||||
|
||||
type MaxPool <: Model
|
||||
size::Dims{2}
|
||||
stride::Dims{2}
|
||||
end
|
||||
|
||||
MaxPool(size; stride = (1,1)) =
|
||||
MaxPool(size, stride)
|
||||
|
||||
shape(c::MaxPool, in::Dims{2}) =
|
||||
map(i -> (in[i]-c.size[i])÷c.stride[i]+1, (1,2))
|
||||
|
||||
shape(c::MaxPool, in::Dims{3}) =
|
||||
(shape(c, (in[1],in[2]))..., in[3])
|
||||
|
||||
shape(c::MaxPool, in) = throw(ShapeError(c, in))
|
||||
|
||||
immutable Reshape{N}
|
||||
dims::Dims{N}
|
||||
end
|
||||
|
||||
Reshape(dims::Integer...) = Reshape(dims)
|
||||
|
||||
function shape(r::Reshape, dims)
|
||||
prod(dims) == prod(r.dims) || throw(ShapeError(r, dims))
|
||||
return r.dims
|
||||
end
|
||||
|
||||
shape(r::Reshape, ::Void) = r.dims
|
111
src/model.jl
111
src/model.jl
@ -1,111 +0,0 @@
|
||||
export Model, back!, update!, param
|
||||
|
||||
# Basic model API
|
||||
|
||||
"""
|
||||
(m::Model)(X...) => Y
|
||||
|
||||
A "model" is a function with state. For example, a logistic regression is the
|
||||
function
|
||||
|
||||
x -> σ(x * W + b)
|
||||
|
||||
where `W` and `b` are a trainable matrix and vector of weights repectively. The
|
||||
`Model` abstract type is used loosely; in general the concept of a model is
|
||||
closer to a protocol, and models don't need to inherit from this type. Normal
|
||||
Julia functions are models with 0 parameters, for example.
|
||||
"""
|
||||
abstract Model
|
||||
|
||||
"""
|
||||
back!(m::Model, ΔY, X...) => ΔX
|
||||
|
||||
Backpropagate the gradient `ΔY` through the model `m`, accumulating the
|
||||
gradients of any parameters. Returns the gradient of the input `X`. Gradients
|
||||
may be arrays or tuples of arrays (for multiple inputs/outputs).
|
||||
"""
|
||||
back!(m::Model, Δ, xs...) = error("Backprop not implemented for $(typeof(m))")
|
||||
|
||||
"""
|
||||
update!(m::Model, η) => m
|
||||
|
||||
Update the parameters of the model `m` using the accumulated gradients from
|
||||
`back!`, using the learning rate `η`.
|
||||
"""
|
||||
update!(m, η) = m
|
||||
|
||||
"""
|
||||
graph(m::Model) => ::IVertex{Any} | nothing
|
||||
|
||||
Returns the graph representation of the model, if any. Most models are built
|
||||
from lower-level components and can simply implement this method to get most of
|
||||
Flux's functionality. If this method isn't available, functionality like
|
||||
backpropagation or conversion for backend must be implemented on a case-by-case
|
||||
basis. Alternatively, one can implement this method and override individual
|
||||
methods as necessary.
|
||||
"""
|
||||
graph(m) = nothing
|
||||
|
||||
# Model parameters
|
||||
|
||||
"""
|
||||
A `Param` object stores a parameter array along with an accumulated delta to
|
||||
that array. When converting to backends like TensorFlow, identical `Param`s will
|
||||
result in identical variable objects, making model reuse trivial.
|
||||
"""
|
||||
type Param{T}
|
||||
x::T
|
||||
Δx::T
|
||||
end
|
||||
|
||||
"""
|
||||
param(x::T) => ::Param{T}
|
||||
|
||||
Convenience method for creating a `Param` object for a given array.
|
||||
"""
|
||||
param(x) = Param(x, zero(x))
|
||||
|
||||
state(p::Param) = p.x
|
||||
|
||||
"""
|
||||
accumulate!(p::Param, Δ) => p
|
||||
|
||||
Accumulates the update `Δ` on `p`. The value of `p` won't change until
|
||||
`update!`.
|
||||
"""
|
||||
function accumulate!(p::Param, Δ)
|
||||
p.Δx += Δ
|
||||
return p
|
||||
end
|
||||
|
||||
"""
|
||||
update!(p::Param)
|
||||
|
||||
Apply the accumulated updates to the value of the parameter.
|
||||
"""
|
||||
function update!(p::Param, η)
|
||||
p.x .-= p.Δx .* η
|
||||
p.Δx[:] = 0
|
||||
return p
|
||||
end
|
||||
|
||||
state(x) = x
|
||||
accumulate!(x, Δ) = x
|
||||
|
||||
@forward Param.x Base.size
|
||||
|
||||
function Base.show(io::IO, p::Param)
|
||||
print(io, "Param", size(p.x))
|
||||
end
|
||||
|
||||
# Anonymous models
|
||||
|
||||
export Capacitor
|
||||
|
||||
type Capacitor <: Model
|
||||
graph::IVertex{Any}
|
||||
end
|
||||
|
||||
(m::Capacitor)(xs...) = interpret(reifyparams(m.graph), xs...)
|
||||
|
||||
graph(cap::Capacitor) = cap.graph
|
30
src/utils.jl
30
src/utils.jl
@ -1,30 +0,0 @@
|
||||
export AArray
|
||||
|
||||
const AArray = AbstractArray
|
||||
|
||||
initn(dims...) = randn(Float32, dims...)/10
|
||||
|
||||
function train!(m, train, test = []; epoch = 1, batch = 10, η = 0.1)
|
||||
i = 0
|
||||
Δ = zeros(length(train[1][2]))
|
||||
for _ in 1:epoch
|
||||
@progress for (x, y) in train
|
||||
i += 1
|
||||
pred = m(x)
|
||||
any(isnan, pred) && error("NaN")
|
||||
err = mse!(Δ, pred, y)
|
||||
back!(m, Δ, x)
|
||||
i % batch == 0 && update!(m, η)
|
||||
i % 1000 == 0 && @show accuracy(m, test)
|
||||
end
|
||||
end
|
||||
return m
|
||||
end
|
||||
|
||||
function accuracy(m, data)
|
||||
correct = 0
|
||||
for (x, y) in data
|
||||
onecold(m(x)) == onecold(y) && (correct += 1)
|
||||
end
|
||||
return correct/length(data)
|
||||
end
|
@ -1,10 +0,0 @@
|
||||
xs = randn(10)' # TODO: batching semantics
|
||||
|
||||
d = Affine(10, 20)
|
||||
|
||||
@test d(xs) == xs*d.W.x + d.b.x
|
||||
|
||||
let
|
||||
@capture(syntax(d), _Frame(_Line(x_[1] * W_ + b_)))
|
||||
@test isa(x, Input) && isa(W, Param) && isa(b, Param)
|
||||
end
|
@ -1,10 +0,0 @@
|
||||
using Flux, DataFlow, MacroTools, Base.Test
|
||||
using Flux: graph, Param
|
||||
using DataFlow: Input, Line, Frame
|
||||
|
||||
syntax(v::Vertex) = prettify(DataFlow.syntax(v))
|
||||
syntax(x) = syntax(graph(x))
|
||||
|
||||
include("basic.jl")
|
||||
include("recurrent.jl")
|
||||
include("backend.jl")
|
Loading…
Reference in New Issue
Block a user