remove all
This commit is contained in:
parent
1010b18084
commit
dcbc77980e
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,5 +0,0 @@
|
|||||||
*.jl.cov
|
|
||||||
*.jl.*.cov
|
|
||||||
*.jl.mem
|
|
||||||
docs/build/
|
|
||||||
docs/site/
|
|
17
.travis.yml
17
.travis.yml
@ -1,17 +0,0 @@
|
|||||||
# Documentation: http://docs.travis-ci.com/user/languages/julia/
|
|
||||||
language: julia
|
|
||||||
os:
|
|
||||||
- linux
|
|
||||||
- osx
|
|
||||||
julia:
|
|
||||||
- 0.5
|
|
||||||
notifications:
|
|
||||||
email: false
|
|
||||||
# uncomment the following lines to override the default test script
|
|
||||||
script:
|
|
||||||
- if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
|
|
||||||
- julia -e 'Pkg.clone("https://github.com/MikeInnes/DataFlow.jl")'
|
|
||||||
- julia -e 'Pkg.clone(pwd()); Pkg.build("Flux"); Pkg.test("Flux"; coverage=true)'
|
|
||||||
# after_success:
|
|
||||||
- julia -e 'Pkg.add("Documenter")'
|
|
||||||
- julia -e 'cd(Pkg.dir("Flux")); include(joinpath("docs", "make.jl"))'
|
|
22
LICENSE.md
22
LICENSE.md
@ -1,22 +0,0 @@
|
|||||||
The Flux.jl package is licensed under the MIT "Expat" License:
|
|
||||||
|
|
||||||
> Copyright (c) 2016: Mike Innes.
|
|
||||||
>
|
|
||||||
> Permission is hereby granted, free of charge, to any person obtaining
|
|
||||||
> a copy of this software and associated documentation files (the
|
|
||||||
> "Software"), to deal in the Software without restriction, including
|
|
||||||
> without limitation the rights to use, copy, modify, merge, publish,
|
|
||||||
> distribute, sublicense, and/or sell copies of the Software, and to
|
|
||||||
> permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
> the following conditions:
|
|
||||||
>
|
|
||||||
> The above copyright notice and this permission notice shall be
|
|
||||||
> included in all copies or substantial portions of the Software.
|
|
||||||
>
|
|
||||||
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
||||||
> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
||||||
> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
||||||
> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
50
README.md
50
README.md
@ -1,50 +0,0 @@
|
|||||||
# Флукс
|
|
||||||
|
|
||||||
[](https://travis-ci.org/MikeInnes/Flux.jl)
|
|
||||||
|
|
||||||
Flux is a high-level API for machine learning, implemented in Julia.
|
|
||||||
|
|
||||||
Flux aims to provide a concise and expressive syntax for architectures that are hard to express within other frameworks. The notation should be familiar and extremely close to what you'd find in a paper or description of the model.
|
|
||||||
|
|
||||||
The current focus is on ANNs with TensorFlow or MXNet as a backend. While it's in a very early working-prototype stage, you can see what works so far in the [examples folder](/examples).
|
|
||||||
|
|
||||||
## Brief Examples
|
|
||||||
|
|
||||||
Simple multi-layer-perceptron for MNIST:
|
|
||||||
|
|
||||||
```julia
|
|
||||||
Chain(
|
|
||||||
Input(784),
|
|
||||||
Affine(128), relu,
|
|
||||||
Affine( 64), relu,
|
|
||||||
Affine( 10), softmax)
|
|
||||||
```
|
|
||||||
|
|
||||||
LSTM example:
|
|
||||||
|
|
||||||
```julia
|
|
||||||
@net type LSTM
|
|
||||||
Wxf; Wyf; bf
|
|
||||||
Wxi; Wyi; bi
|
|
||||||
Wxo; Wyo; bo
|
|
||||||
Wxc; Wyc; bc
|
|
||||||
y; state
|
|
||||||
function (x)
|
|
||||||
# Gates
|
|
||||||
forget = σ( x * Wxf + y{-1} * Wyf + bf )
|
|
||||||
input = σ( x * Wxi + y{-1} * Wyi + bi )
|
|
||||||
output = σ( x * Wxo + y{-1} * Wyo + bo )
|
|
||||||
# State update and output
|
|
||||||
state′ = tanh( x * Wxc + y{-1} * Wyc + bc )
|
|
||||||
state = forget .* state{-1} + input .* state′
|
|
||||||
y = output .* tanh(state)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
Chain(
|
|
||||||
Input(N),
|
|
||||||
LSTM(N, 256),
|
|
||||||
LSTM(256, 256),
|
|
||||||
Affine(256, N),
|
|
||||||
softmax)
|
|
||||||
```
|
|
17
docs/make.jl
17
docs/make.jl
@ -1,17 +0,0 @@
|
|||||||
using Documenter, Flux
|
|
||||||
|
|
||||||
makedocs(modules=Module[Flux],
|
|
||||||
doctest=false, clean=true,
|
|
||||||
format = :html,
|
|
||||||
sitename="Flux Documentation",
|
|
||||||
pages = [
|
|
||||||
"Home" => "index.md",
|
|
||||||
])
|
|
||||||
|
|
||||||
deploydocs(
|
|
||||||
repo = "github.com/MikeInnes/Flux.jl.git",
|
|
||||||
target = "build",
|
|
||||||
osname = "linux",
|
|
||||||
julia = "0.5",
|
|
||||||
deps = nothing,
|
|
||||||
make = nothing)
|
|
@ -1 +0,0 @@
|
|||||||
# Flux
|
|
@ -1,22 +0,0 @@
|
|||||||
using Flux, MNIST
|
|
||||||
|
|
||||||
data = [(Vector{Float32}(trainfeatures(i)), onehot(Float32, trainlabel(i), 0:9)) for i = 1:60_000]
|
|
||||||
train = data[1:50_000]
|
|
||||||
test = data[50_001:60_000]
|
|
||||||
|
|
||||||
m = Chain(
|
|
||||||
Input(784),
|
|
||||||
Affine(128), relu,
|
|
||||||
Affine( 64), relu,
|
|
||||||
Affine( 10), softmax)
|
|
||||||
|
|
||||||
# Convert to TensorFlow
|
|
||||||
model = tf(m)
|
|
||||||
|
|
||||||
# An example prediction pre-training
|
|
||||||
model(data[1][1])
|
|
||||||
|
|
||||||
@time Flux.train!(model, train, test, η = 1e-3)
|
|
||||||
|
|
||||||
# An example prediction post-training
|
|
||||||
model(data[1][1])
|
|
@ -1,26 +0,0 @@
|
|||||||
using Flux
|
|
||||||
|
|
||||||
# Traditional Approach
|
|
||||||
|
|
||||||
# 100 samples of sequences of 15 28×28 3-colour images
|
|
||||||
rand(100, 15, 28, 28, 3)
|
|
||||||
|
|
||||||
# Basic Batching
|
|
||||||
|
|
||||||
data = Batch([collect(reshape(9(i-1):9i-1, 3, 3)) for i = 1:10])
|
|
||||||
|
|
||||||
Batch(flatten.(data))
|
|
||||||
|
|
||||||
data |> structure
|
|
||||||
|
|
||||||
Batch(flatten.(data)) |> structure
|
|
||||||
|
|
||||||
# Nested Batching
|
|
||||||
|
|
||||||
# DNA seqence, encoded as a list of [A, T, G, C]
|
|
||||||
x1 = Seq([[0,1,0,0], [1,0,0,0], [0,0,0,1]])
|
|
||||||
x2 = Seq([[0,0,1,0], [0,0,0,1], [0,0,1,0]])
|
|
||||||
|
|
||||||
data = Batch([x1, x2])
|
|
||||||
|
|
||||||
data |> structure
|
|
@ -1,38 +0,0 @@
|
|||||||
using Flux
|
|
||||||
import StatsBase: wsample
|
|
||||||
|
|
||||||
nunroll = 50
|
|
||||||
nbatch = 50
|
|
||||||
|
|
||||||
getseqs(chars, alphabet) = sequences((onehot(Float32, char, alphabet) for char in chars), nunroll)
|
|
||||||
getbatches(chars, alphabet) = batches((getseqs(part, alphabet) for part in chunk(chars, nbatch))...)
|
|
||||||
|
|
||||||
input = readstring("$(homedir())/Downloads/shakespeare_input.txt")
|
|
||||||
alphabet = unique(input)
|
|
||||||
N = length(alphabet)
|
|
||||||
|
|
||||||
Xs, Ys = getbatches(input, alphabet), getbatches(input[2:end], alphabet)
|
|
||||||
|
|
||||||
model = Chain(
|
|
||||||
Input(N),
|
|
||||||
LSTM(N, 256),
|
|
||||||
LSTM(256, 256),
|
|
||||||
Affine(256, N),
|
|
||||||
softmax)
|
|
||||||
|
|
||||||
m = tf(unroll(model, nunroll))
|
|
||||||
|
|
||||||
@time Flux.train!(m, Xs, Ys, η = 0.1, epoch = 1)
|
|
||||||
|
|
||||||
string(map(c -> onecold(c, alphabet), m(first(first(Xs))))...)
|
|
||||||
|
|
||||||
function sample(model, n, temp = 1)
|
|
||||||
s = [rand(alphabet)]
|
|
||||||
m = tf(unroll(model, 1))
|
|
||||||
for i = 1:n
|
|
||||||
push!(s, wsample(alphabet, softmax(m(Seq((onehot(Float32, s[end], alphabet),)))[1]./temp)))
|
|
||||||
end
|
|
||||||
return string(s...)
|
|
||||||
end
|
|
||||||
|
|
||||||
sample(model, 100)
|
|
@ -1,70 +0,0 @@
|
|||||||
using Flux, Juno
|
|
||||||
|
|
||||||
# Flux aims to provide high-level APIs that work well across backends, but in
|
|
||||||
# some cases you may want to take advantage of features specific to a given
|
|
||||||
# backend (or Flux may simply not have an implementation of that feature yet).
|
|
||||||
# In these cases it's easy to "drop down" and use the backend's API directly,
|
|
||||||
# where appropriate.
|
|
||||||
|
|
||||||
# In this example, both things are happening; firstly, Flux doesn't yet support
|
|
||||||
# ConvNets in the pure-Julia backend, but this is invisible thanks to the use of
|
|
||||||
# a simple "shim" type, `Conv2D`. This is provided by the library but could easily
|
|
||||||
# have been user-defined.
|
|
||||||
|
|
||||||
# Secondly, we want to take advantage of TensorFlow.jl's training process and
|
|
||||||
# optimisers. We can simply call `Tensor` exactly as we would on a regular
|
|
||||||
# TensorFlow model, and the rest of the process trivially follows
|
|
||||||
# TensorFlow.jl's usual API.
|
|
||||||
|
|
||||||
conv1 = Chain(
|
|
||||||
Reshape(28,28,1),
|
|
||||||
Conv2D((5,5), out = 20), tanh,
|
|
||||||
MaxPool((2,2), stride = (2,2)))
|
|
||||||
|
|
||||||
conv2 = Chain(
|
|
||||||
Input(12,12,20),
|
|
||||||
Conv2D((5,5), in = 20, out = 50), tanh,
|
|
||||||
MaxPool((2,2), stride = (2,2)))
|
|
||||||
|
|
||||||
lenet = Chain(
|
|
||||||
conv1, conv2, flatten,
|
|
||||||
Affine(500), tanh,
|
|
||||||
Affine(10), softmax)
|
|
||||||
|
|
||||||
#--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Now we can continue exactly as in plain TensorFlow, following
|
|
||||||
# https://github.com/malmaud/TensorFlow.jl/blob/master/examples/mnist_full.jl
|
|
||||||
# (taking only the training and cost logic, not the graph building steps)
|
|
||||||
|
|
||||||
using TensorFlow, Distributions
|
|
||||||
|
|
||||||
include(Pkg.dir("TensorFlow", "examples", "mnist_loader.jl"))
|
|
||||||
loader = DataLoader()
|
|
||||||
|
|
||||||
session = Session(Graph())
|
|
||||||
|
|
||||||
x = placeholder(Float32)
|
|
||||||
y′ = placeholder(Float32)
|
|
||||||
y = Tensor(lenet, x)
|
|
||||||
|
|
||||||
cross_entropy = reduce_mean(-reduce_sum(y′.*log(y), reduction_indices=[2]))
|
|
||||||
|
|
||||||
train_step = train.minimize(train.AdamOptimizer(1e-4), cross_entropy)
|
|
||||||
|
|
||||||
accuracy = reduce_mean(cast(indmax(y, 2) .== indmax(y′, 2), Float32))
|
|
||||||
|
|
||||||
run(session, initialize_all_variables())
|
|
||||||
|
|
||||||
@progress for i in 1:1000
|
|
||||||
batch = next_batch(loader, 50)
|
|
||||||
if i%100 == 1
|
|
||||||
train_accuracy = run(session, accuracy, Dict(x=>batch[1], y′=>batch[2]))
|
|
||||||
info("step $i, training accuracy $train_accuracy")
|
|
||||||
end
|
|
||||||
run(session, train_step, Dict(x=>batch[1], y′=>batch[2]))
|
|
||||||
end
|
|
||||||
|
|
||||||
testx, testy = load_test_set()
|
|
||||||
test_accuracy = run(session, accuracy, Dict(x=>testx, y′=>testy))
|
|
||||||
info("test accuracy $test_accuracy")
|
|
@ -1,52 +0,0 @@
|
|||||||
# Based on https://arxiv.org/abs/1409.0473
|
|
||||||
|
|
||||||
using Flux
|
|
||||||
using Flux: flip
|
|
||||||
|
|
||||||
# A recurrent model which takes a token and returns a context-dependent
|
|
||||||
# annotation.
|
|
||||||
|
|
||||||
@net type Encoder
|
|
||||||
forward
|
|
||||||
backward
|
|
||||||
token -> hcat(forward(token), backward(token))
|
|
||||||
end
|
|
||||||
|
|
||||||
Encoder(in::Integer, out::Integer) =
|
|
||||||
Encoder(LSTM(in, out÷2), flip(LSTM(in, out÷2)))
|
|
||||||
|
|
||||||
# A recurrent model which takes a sequence of annotations, attends, and returns
|
|
||||||
# a predicted output token.
|
|
||||||
|
|
||||||
@net type Decoder
|
|
||||||
attend
|
|
||||||
recur
|
|
||||||
state; y; N
|
|
||||||
function (anns)
|
|
||||||
energies = map(ann -> exp(attend(hcat(state{-1}, ann))[1]), seq(anns, N))
|
|
||||||
weights = energies./sum(energies)
|
|
||||||
ctx = sum(map((α, ann) -> α .* ann, weights, anns))
|
|
||||||
(_, state), y = recur((state{-1},y{-1}), ctx)
|
|
||||||
y
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
Decoder(in::Integer, out::Integer; N = 1) =
|
|
||||||
Decoder(Affine(in+out, 1),
|
|
||||||
unroll1(LSTM(in, out)),
|
|
||||||
param(zeros(1, out)), param(zeros(1, out)), N)
|
|
||||||
|
|
||||||
# The model
|
|
||||||
|
|
||||||
Nalpha = 5 # The size of the input token vector
|
|
||||||
Nphrase = 7 # The length of (padded) phrases
|
|
||||||
Nhidden = 12 # The size of the hidden state
|
|
||||||
|
|
||||||
encode = Encoder(Nalpha, Nhidden)
|
|
||||||
decode = Chain(Decoder(Nhidden, Nhidden, N = Nphrase), Affine(Nhidden, Nalpha), softmax)
|
|
||||||
|
|
||||||
model = Chain(
|
|
||||||
unroll(encode, Nphrase, stateful = false),
|
|
||||||
unroll(decode, Nphrase, stateful = false, seq = false))
|
|
||||||
|
|
||||||
xs = Batch([Seq(rand(Float32, Nalpha) for _ = 1:Nphrase)])
|
|
36
src/Flux.jl
36
src/Flux.jl
@ -1,36 +0,0 @@
|
|||||||
module Flux
|
|
||||||
|
|
||||||
using MacroTools, Lazy, DataFlow, Juno
|
|
||||||
using DataFlow: graphm, syntax, prewalk!, postwalk!, prewalk, postwalk,
|
|
||||||
iscyclic, Constant, constant, isconstant, group, Split, splitnode,
|
|
||||||
detuple, value, inputs, thread!, value, inputs, Split, splitnode, inputnode,
|
|
||||||
spliceinputs, bumpinputs, Frame
|
|
||||||
using Juno: Tree, Row
|
|
||||||
|
|
||||||
# Zero Flux Given
|
|
||||||
|
|
||||||
include("model.jl")
|
|
||||||
include("utils.jl")
|
|
||||||
include("data.jl")
|
|
||||||
|
|
||||||
include("compiler/code.jl")
|
|
||||||
include("compiler/loops.jl")
|
|
||||||
include("compiler/interp.jl")
|
|
||||||
include("compiler/shape.jl")
|
|
||||||
|
|
||||||
include("layers/affine.jl")
|
|
||||||
include("layers/activation.jl")
|
|
||||||
include("layers/recurrent.jl")
|
|
||||||
include("layers/shape.jl")
|
|
||||||
include("layers/chain.jl")
|
|
||||||
include("layers/shims.jl")
|
|
||||||
|
|
||||||
include("dims/catmat.jl")
|
|
||||||
include("dims/batching.jl")
|
|
||||||
include("dims/seq.jl")
|
|
||||||
|
|
||||||
include("cost.jl")
|
|
||||||
|
|
||||||
include("backend/backend.jl")
|
|
||||||
|
|
||||||
end # module
|
|
@ -1,11 +0,0 @@
|
|||||||
export tf
|
|
||||||
|
|
||||||
function loadtf()
|
|
||||||
isdefined(Flux, :TF) && return
|
|
||||||
@eval include(joinpath(dirname($@__FILE__), "tensorflow/tensorflow.jl"))
|
|
||||||
end
|
|
||||||
|
|
||||||
function tf(args...)
|
|
||||||
loadtf()
|
|
||||||
TF.tf(args...)
|
|
||||||
end
|
|
@ -1,72 +0,0 @@
|
|||||||
using Base: @get!
|
|
||||||
using DataFlow: Constant, constant, Context, interpret, Split,
|
|
||||||
interpv, ituple, ilambda, iconst, iline, stack, mux
|
|
||||||
using Flux: imap
|
|
||||||
using TensorFlow: RawTensor
|
|
||||||
|
|
||||||
# TODO: implement Julia's type promotion rules
|
|
||||||
|
|
||||||
node(x::Tuple) = map(node, x)
|
|
||||||
node(x::Tensor) = x
|
|
||||||
node(x::Variable) = x
|
|
||||||
node(x::Number) = TensorFlow.constant(Float32(x))
|
|
||||||
|
|
||||||
graph(::typeof(tuple), args...) = (args...,)
|
|
||||||
graph(s::Split, t::Tuple) = t[s.n]
|
|
||||||
graph(::typeof(softmax), x) = nn.softmax(x)
|
|
||||||
graph(::typeof(relu), x) = nn.relu(x)
|
|
||||||
graph(::typeof(σ), x) = nn.sigmoid(x)
|
|
||||||
graph(::typeof(hcat), xs...) = concat(1, xs)
|
|
||||||
graph(::typeof(seq), xs, n) = TensorFlow.unpack(xs, num = n, axis = 1)
|
|
||||||
|
|
||||||
for op in (tanh, *, .*, +, -)
|
|
||||||
@eval graph(::typeof($op), args...) = $op(node(args)...)
|
|
||||||
end
|
|
||||||
|
|
||||||
graph(::typeof(.-), args...) = -(node(args)...)
|
|
||||||
|
|
||||||
# reshape hack due to https://github.com/malmaud/TensorFlow.jl/issues/79
|
|
||||||
batchsize(x::Tensor) = reduce_sum(slice(TensorFlow.shape(x), [0], [1]))
|
|
||||||
graph(::typeof(flatten), x) = reshape(x, pack([batchsize(x), Int32(-1)]))
|
|
||||||
graph(r::Reshape, x) = reshape(x, pack([batchsize(x), map(Int32, r.dims)...]))
|
|
||||||
|
|
||||||
graph(::Input, x) = x
|
|
||||||
|
|
||||||
graph(p::MaxPool, x) =
|
|
||||||
nn.max_pool(x, [1, p.size..., 1], [1, p.stride..., 1], "VALID")
|
|
||||||
|
|
||||||
graph(op::Op, xs...) = op.f(xs...)
|
|
||||||
|
|
||||||
function graph(ctx::Context, model, args...)
|
|
||||||
node = graph(model, interpv(ctx, args)...)
|
|
||||||
isa(node, Tensor) && (ctx[:stacks][node.op.name] = stack(ctx))
|
|
||||||
return node
|
|
||||||
end
|
|
||||||
|
|
||||||
interp(ctx, c::Conv2D, x) =
|
|
||||||
nn.conv2d(interpv(ctx, x), interp(ctx, Constant(c.filter)), [1,c.stride...,1], "VALID")
|
|
||||||
|
|
||||||
interp{T<:AArray}(ctx, p::Constant{Flux.Param{T}}) =
|
|
||||||
haskey(ctx[:params], p.value) ?
|
|
||||||
ctx[:params][p.value] :
|
|
||||||
(ctx[:params][p.value] = Variable(p.value.x))
|
|
||||||
|
|
||||||
interp(ctx, p::Constant) = p.value
|
|
||||||
|
|
||||||
function interp(ctx, model, args...)
|
|
||||||
g = Flux.graph(model)
|
|
||||||
g == nothing && return graph(ctx, model, args...)
|
|
||||||
DataFlow.iscyclic(g) && error("This model has a cycle; try unrolling it first.")
|
|
||||||
interpret(ctx, g, interpv(ctx, args)...)
|
|
||||||
end
|
|
||||||
|
|
||||||
function tograph(model, args...)
|
|
||||||
ctx = Context(mux(iline, ilambda, ituple, imap, interp),
|
|
||||||
params = ObjectIdDict(), stacks = Dict())
|
|
||||||
out = interp(ctx, model, map(constant, args)...)
|
|
||||||
return ctx[:params], ctx[:stacks], out
|
|
||||||
end
|
|
||||||
|
|
||||||
TensorFlow.Tensor(m::Flux.Model, args...) = tograph(m, args...)[2]
|
|
||||||
|
|
||||||
RawTensor(data::Union{Batch,Seq}) = RawTensor(rawbatch(data))
|
|
@ -1,98 +0,0 @@
|
|||||||
type Model
|
|
||||||
model::Any
|
|
||||||
session::Session
|
|
||||||
params::Dict{Flux.Param,Tensor}
|
|
||||||
stacks::Dict
|
|
||||||
inputs::Vector{Tensor}
|
|
||||||
output::Any
|
|
||||||
end
|
|
||||||
|
|
||||||
function makesession(model, inputs; session = Session(Graph()))
|
|
||||||
params, stacks, output = tograph(model, inputs...)
|
|
||||||
run(session, initialize_all_variables())
|
|
||||||
Model(model, session, params, stacks, inputs, output)
|
|
||||||
end
|
|
||||||
|
|
||||||
function makesession(model, n::Integer; session = Session(Graph()))
|
|
||||||
makesession(model, [placeholder(Float32) for _ = 1:n], session = session)
|
|
||||||
end
|
|
||||||
|
|
||||||
tf(model) = makesession(model, 1)
|
|
||||||
|
|
||||||
function storeparams!(sess, params)
|
|
||||||
for (p, t) in params
|
|
||||||
p.x = run(sess, t)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
storeparams!(m::Model) = storeparams!(m.session, m.params)
|
|
||||||
|
|
||||||
ismultioutput(m::Model) = !isa(m.output, Tensor)
|
|
||||||
|
|
||||||
function batch(xs)
|
|
||||||
dims = ndims(xs)-1
|
|
||||||
T = Array{eltype(xs),dims}
|
|
||||||
B = Array{eltype(xs),dims+1}
|
|
||||||
Batch{T,B}(xs)
|
|
||||||
end
|
|
||||||
|
|
||||||
function tferr(model::Model, e)
|
|
||||||
m = match(r"Node: ([\w\d]+) =", string(e.status))
|
|
||||||
m == nothing && return
|
|
||||||
node = m.captures[1]
|
|
||||||
if haskey(model.stacks, node)
|
|
||||||
stk = model.stacks[node]
|
|
||||||
println("TensorFlow error occured at:")
|
|
||||||
foreach(l -> println("$(l.file):$(l.line)"), stk)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function runmodel(m::Model, args...)
|
|
||||||
@assert length(args) == length(m.inputs)
|
|
||||||
try
|
|
||||||
output = run(m.session, m.output, Dict(zip(m.inputs, args)))
|
|
||||||
ismultioutput(m) ? (batch.(output)...,) : batch(output)
|
|
||||||
catch e
|
|
||||||
isa(e, TensorFlow.TFException) || rethrow(e)
|
|
||||||
tferr(m, e)
|
|
||||||
rethrow(e)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function (m::Model)(args::Batch...)
|
|
||||||
runmodel(m, args...)
|
|
||||||
end
|
|
||||||
|
|
||||||
function (m::Model)(args...)
|
|
||||||
output = m(map(batchone, args)...)
|
|
||||||
ismultioutput(m) ? map(first, output) : first(output)
|
|
||||||
end
|
|
||||||
|
|
||||||
for f in :[back!, update!].args
|
|
||||||
@eval function Flux.$f(m::Model, args...)
|
|
||||||
error($(string(f)) * " is not yet supported on TensorFlow models")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
import Juno: info
|
|
||||||
|
|
||||||
function Flux.train!(m::Model, train, test=[]; epoch = 1, η = 0.1,
|
|
||||||
loss = (y, y′) -> reduce_sum((y - y′).^2)/2,
|
|
||||||
opt = TensorFlow.train.GradientDescentOptimizer(η))
|
|
||||||
i = 0
|
|
||||||
Y = placeholder(Float32)
|
|
||||||
Loss = loss(m.output, Y)
|
|
||||||
minimize_op = TensorFlow.train.minimize(opt, Loss)
|
|
||||||
for e in 1:epoch
|
|
||||||
info("Epoch $e\n")
|
|
||||||
@progress for (x, y) in train
|
|
||||||
y, cur_loss, _ = run(m.session, vcat(m.output, Loss, minimize_op),
|
|
||||||
Dict(m.inputs[1]=>batchone(x), Y=>batchone(y)))
|
|
||||||
if i % 5000 == 0
|
|
||||||
@show y
|
|
||||||
@show accuracy(m, test)
|
|
||||||
end
|
|
||||||
i += 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
@ -1,83 +0,0 @@
|
|||||||
# TODO: refactor, some of this is more general than just the TF backend
|
|
||||||
|
|
||||||
type SeqModel
|
|
||||||
m::Model
|
|
||||||
state::Any
|
|
||||||
end
|
|
||||||
|
|
||||||
cgroup(xs...) = DataFlow.group(map(constant, xs)...)
|
|
||||||
|
|
||||||
function makesession(model::Flux.Unrolled)
|
|
||||||
sess = Session(Graph())
|
|
||||||
input = placeholder(Float32)
|
|
||||||
inputs = TensorFlow.unpack(input, num = model.steps, axis = 1)
|
|
||||||
let params, stacks, outputs, instates, outstates
|
|
||||||
if model.stateful
|
|
||||||
instates = [placeholder(Float32) for _ in model.state]
|
|
||||||
params, stacks, (outstates, outputs) = tograph(model, cgroup(instates...), cgroup(inputs...))
|
|
||||||
else
|
|
||||||
params, stacks, outputs = tograph(model, cgroup(inputs...))
|
|
||||||
end
|
|
||||||
output = TensorFlow.pack(outputs, axis = 1)
|
|
||||||
run(sess, initialize_all_variables())
|
|
||||||
sess, params, stacks, (instates, input), (outstates, output)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function tf(model::Flux.Unrolled)
|
|
||||||
sess, params, stacks, (instates, input), (outstates, output) = makesession(model)
|
|
||||||
SeqModel(
|
|
||||||
Model(model, sess, params, stacks,
|
|
||||||
[instates..., input], [outstates..., output]),
|
|
||||||
model.state)
|
|
||||||
end
|
|
||||||
|
|
||||||
function batchseq(xs)
|
|
||||||
dims = ndims(xs)-2
|
|
||||||
T = Array{eltype(xs),dims}
|
|
||||||
S = Array{eltype(xs),dims+1}
|
|
||||||
B = Array{eltype(xs),dims+2}
|
|
||||||
Batch{Seq{T,S},B}(xs)
|
|
||||||
end
|
|
||||||
|
|
||||||
batchseq(xs::Batch) = batchseq(rawbatch(xs))
|
|
||||||
|
|
||||||
TensorFlow.get_tensors(x::Tuple) = TensorFlow.get_tensors(collect(x))
|
|
||||||
|
|
||||||
function (m::SeqModel)(x::BatchSeq)
|
|
||||||
m.m.model.stateful || return batchseq(runmodel(m.m, x)[end])
|
|
||||||
if isempty(m.state) || length(first(m.state)) ≠ length(x)
|
|
||||||
m.state = batchone.(m.m.model.state)
|
|
||||||
end
|
|
||||||
output = runmodel(m.m, m.state..., x)
|
|
||||||
m.state, output = output[1:end-1], output[end]
|
|
||||||
return batchseq(output)
|
|
||||||
end
|
|
||||||
|
|
||||||
(m::SeqModel)(x::Seq) = first(m(batchone(x)))
|
|
||||||
|
|
||||||
function Flux.train!(m::SeqModel, Xs, Ys; epoch = 1, η = 0.1,
|
|
||||||
loss = (y, ŷ) -> -reduce_sum(y .* log(ŷ)),
|
|
||||||
opt = () -> TensorFlow.train.GradientDescentOptimizer(η))
|
|
||||||
batchlen, seqlen = length(first(Xs)), length(first(Xs)[1])
|
|
||||||
state = batchone.(m.m.model.state)
|
|
||||||
sess, params, stacks, (instates, input), (outstates, output) = makesession(m.m.model)
|
|
||||||
Y = placeholder(Float32)
|
|
||||||
Loss = loss(Y, output)/batchlen/seqlen
|
|
||||||
minimize_op = TensorFlow.train.minimize(opt(), Loss)
|
|
||||||
@progress "training" for e in 1:epoch
|
|
||||||
info("Epoch $e\n")
|
|
||||||
@progress "epoch" for (i, (x, y)) in enumerate(zip(Xs,Ys))
|
|
||||||
out = run(sess, vcat(outstates..., output, Loss, minimize_op),
|
|
||||||
merge(Dict(input=>batchone(x), Y=>batchone(y)),
|
|
||||||
Dict(zip(instates, state))))
|
|
||||||
state = out[1:length(state)]
|
|
||||||
loss = out[end-1]
|
|
||||||
isnan(loss) && error("Loss is NaN")
|
|
||||||
isinf(loss) && error("Loss is Inf")
|
|
||||||
(i-1) % 10 == 0 && @show loss
|
|
||||||
end
|
|
||||||
end
|
|
||||||
storeparams!(sess, params)
|
|
||||||
return
|
|
||||||
end
|
|
@ -1,21 +0,0 @@
|
|||||||
module TF
|
|
||||||
|
|
||||||
using ..Flux, DataFlow, TensorFlow, Juno
|
|
||||||
import Flux: accuracy
|
|
||||||
|
|
||||||
export tf
|
|
||||||
|
|
||||||
type Op
|
|
||||||
f
|
|
||||||
shape
|
|
||||||
end
|
|
||||||
|
|
||||||
Op(f) = Op(f, (d...) -> nothing)
|
|
||||||
|
|
||||||
Flux.shape(op::Op, d...) = op.shape(d...)
|
|
||||||
|
|
||||||
include("graph.jl")
|
|
||||||
include("model.jl")
|
|
||||||
include("recurrent.jl")
|
|
||||||
|
|
||||||
end
|
|
@ -1,81 +0,0 @@
|
|||||||
import DataFlow: mapconst, cse
|
|
||||||
|
|
||||||
export @net, @ml
|
|
||||||
|
|
||||||
function process_func(ex, params = [])
|
|
||||||
@capture(shortdef(ex), (args__,) -> body_)
|
|
||||||
body = @> body MacroTools.flatten liftloops graphm DataFlow.il
|
|
||||||
body = mapconst(x -> x in params ? :(self.$x) : x, body)
|
|
||||||
return args, body
|
|
||||||
end
|
|
||||||
|
|
||||||
function makegraph(graph, args)
|
|
||||||
@assert length(args) == 1
|
|
||||||
graph = prewalk(graph) do v
|
|
||||||
isa(value(v), Constant) && value(v).value == args[1] ?
|
|
||||||
inputnode(1) :
|
|
||||||
v
|
|
||||||
end
|
|
||||||
graph = map(graph) do x
|
|
||||||
isa(x, Offset) ?
|
|
||||||
:(Flux.Offset($(Expr(:quote, x.name)), $(x.n), self.$(x.name))) :
|
|
||||||
x
|
|
||||||
end
|
|
||||||
vertex(:(Flux.Frame(self)), graph)
|
|
||||||
end
|
|
||||||
|
|
||||||
function build_type(T, params)
|
|
||||||
@esc T
|
|
||||||
ex = quote
|
|
||||||
type $T <: Model
|
|
||||||
$(params...)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if any(x->isexpr(x, Symbol), params)
|
|
||||||
push!(ex.args,
|
|
||||||
:($T($(map(x->isexpr(x, Symbol) ? :($x::AArray) : x, params)...)) =
|
|
||||||
$T($(map(x->isexpr(x, Symbol) ? :(param($x)) : namify(x), params)...))))
|
|
||||||
end
|
|
||||||
ex
|
|
||||||
end
|
|
||||||
|
|
||||||
import Lazy: groupby
|
|
||||||
|
|
||||||
reifyparams(v::IVertex) = mapconst(x -> isa(x, Param) ? x.x : x, v)
|
|
||||||
|
|
||||||
function process_type(ex)
|
|
||||||
@capture(ex, type T_ fs__ end)
|
|
||||||
@destruct [params = false || [],
|
|
||||||
funcs = true || []] = groupby(x->isexpr(x, :->, :function), fs)
|
|
||||||
@assert length(funcs) == 1
|
|
||||||
pnames = namify.(params)
|
|
||||||
args, body = process_func(funcs[1], pnames)
|
|
||||||
@assert length(args) == 1
|
|
||||||
self = esc(:self)
|
|
||||||
quote
|
|
||||||
$(build_type(T, params))
|
|
||||||
$(esc(:(self::$T)))($(args...),) = interpret(reifyparams(graph($self)), $(args...))
|
|
||||||
$(esc(:(Flux.update!(self::$T, η)))) = ($(map(p -> :(update!($self.$p, η)), pnames)...);)
|
|
||||||
$(esc(:(Flux.graph(self::$T)))) = $(DataFlow.constructor(mapconst(esc, makegraph(body, args))))
|
|
||||||
nothing
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
macro net(ex)
|
|
||||||
isexpr(ex, :type) ? process_type(ex) :
|
|
||||||
isexpr(ex, :->, :function) ? error("@net functions not implemented") :
|
|
||||||
error("Unsupported model expression $ex")
|
|
||||||
end
|
|
||||||
|
|
||||||
function process_anon(ex)
|
|
||||||
args, body = process_func(ex)
|
|
||||||
@assert length(args) == 1
|
|
||||||
:(Flux.Capacitor($(DataFlow.constructor(mapconst(esc, makegraph(body, args))))))
|
|
||||||
end
|
|
||||||
|
|
||||||
macro ml(ex)
|
|
||||||
@capture(shortdef(ex), ((xs__,) -> body_ ) | (f_(xs__,) = body_)) ||
|
|
||||||
error("@ml requires a function definition")
|
|
||||||
ex = process_anon(:($(xs...,) -> $body))
|
|
||||||
f == nothing ? ex : :($(esc(f)) = $ex)
|
|
||||||
end
|
|
@ -1,26 +0,0 @@
|
|||||||
using DataFlow: mux, interpret, interpv, ituple, ilambda, iconst, Context
|
|
||||||
|
|
||||||
function astuple(xs::Vertex)
|
|
||||||
isconstant(xs) && isa(value(xs).value, Tuple) ? value(xs).value :
|
|
||||||
isa(xs, Vertex) && value(xs) == tuple ? inputs(xs) :
|
|
||||||
nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
astuple(xs::Tuple) = xs
|
|
||||||
|
|
||||||
astuple(xs) = nothing
|
|
||||||
|
|
||||||
function astuples(xs)
|
|
||||||
xs = [astuple(x) for x in xs]
|
|
||||||
all(x->!(x==nothing), xs) ? xs : nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function imap(cb, ctx, ::typeof(map), f, xs...)
|
|
||||||
f, xs = interpv(ctx, (f, xs))
|
|
||||||
xs′ = astuples(xs)
|
|
||||||
xs′ ≠ nothing ?
|
|
||||||
group(map(f, xs′...)...) :
|
|
||||||
cb(ctx, map, constant(f), xs...)
|
|
||||||
end
|
|
||||||
|
|
||||||
imap(f, args...) = f(args...)
|
|
@ -1,134 +0,0 @@
|
|||||||
export unroll, unroll1
|
|
||||||
|
|
||||||
type Offset
|
|
||||||
name::Symbol
|
|
||||||
n::Int
|
|
||||||
default::Nullable{Param}
|
|
||||||
end
|
|
||||||
|
|
||||||
Offset(name, n) = Offset(name, n, nothing)
|
|
||||||
|
|
||||||
Base.:-(o::Offset) = Offset(o.name, -o.n, o.default)
|
|
||||||
|
|
||||||
function liftloops(ex)
|
|
||||||
ex = DataFlow.normedges(ex)
|
|
||||||
decls = Dict()
|
|
||||||
ex = MacroTools.postwalk(ex) do ex
|
|
||||||
@capture(ex, x_{n_}) || return ex
|
|
||||||
haskey(decls, (x,n)) && return namify(decls[(x,n)])
|
|
||||||
@gensym edge
|
|
||||||
decls[(x,n)] = :($edge = $(Offset(x,n))($x))
|
|
||||||
edge
|
|
||||||
end
|
|
||||||
prepend!(ex.args, collect(values(decls)))
|
|
||||||
ex
|
|
||||||
end
|
|
||||||
|
|
||||||
function hasloops(model)
|
|
||||||
g = graph(model)
|
|
||||||
g == nothing && return false
|
|
||||||
iscyclic(g) && return true
|
|
||||||
result = false
|
|
||||||
map(m -> hasloops(m) && (result = true), g)
|
|
||||||
return result
|
|
||||||
end
|
|
||||||
|
|
||||||
function atomise(model)
|
|
||||||
postwalk(graph(model)) do v
|
|
||||||
hasloops(value(v)) || return v
|
|
||||||
spliceinputs(atomise(value(v)), inputs(v)...)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function collect_state(v::IVertex)
|
|
||||||
state = typeof(v)[]
|
|
||||||
offset = Int[]
|
|
||||||
default = Param[]
|
|
||||||
prewalk!(v) do v
|
|
||||||
isa(value(v), Offset) || return v
|
|
||||||
if (i = findfirst(state, v[1])) == 0
|
|
||||||
push!(state, v[1])
|
|
||||||
push!(offset, max(0, -value(v).n))
|
|
||||||
push!(default, get(value(v).default))
|
|
||||||
else
|
|
||||||
offset[i] = max(offset[i], -value(v).n)
|
|
||||||
end
|
|
||||||
v
|
|
||||||
end
|
|
||||||
return state, offset, default
|
|
||||||
end
|
|
||||||
|
|
||||||
hiddeninput(n) = vertex(Split(n), inputnode(1))
|
|
||||||
|
|
||||||
function create_steps(v::IVertex, n; seq = true, stateful = true)
|
|
||||||
[(stateful ? bumpinputs : copy)(seq ? spliceinputs(v, hiddeninput(i)) : v) for i = 1:n]
|
|
||||||
end
|
|
||||||
|
|
||||||
function getvar(n, step, steps, offset, default; stateful = true)
|
|
||||||
if stateful && step < 1
|
|
||||||
hiddeninput(sum(offset[1:n-1]) + 1 - step)
|
|
||||||
elseif step ∉ 1:length(steps)
|
|
||||||
constant(default[n])
|
|
||||||
else
|
|
||||||
steps[step][1,n]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function stateout(steps, offset, default)
|
|
||||||
outs = []
|
|
||||||
defaults = []
|
|
||||||
for i = 1:length(offset), j = 1:offset[i]
|
|
||||||
push!(outs, getvar(i, length(steps)-j+1, steps, offset, default))
|
|
||||||
push!(defaults, default[i])
|
|
||||||
end
|
|
||||||
group(outs...), defaults
|
|
||||||
end
|
|
||||||
|
|
||||||
function unrollgraph(v::IVertex, n; seq = true, stateful = true)
|
|
||||||
state, offset, default = collect_state(v)
|
|
||||||
v = group(group(state...), v)
|
|
||||||
steps = create_steps(v, n, seq = seq, stateful = stateful)
|
|
||||||
for i = 1:n
|
|
||||||
vars = inputs(steps[i][1])
|
|
||||||
postwalk!(steps[i]) do v
|
|
||||||
isa(value(v), Offset) || return v
|
|
||||||
varid = findfirst(vars,v[1])
|
|
||||||
getvar(varid, value(v).n + i, steps, offset, default, stateful = stateful)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
out = group(map(x->x[2], steps)...)
|
|
||||||
if stateful
|
|
||||||
state, defaults = stateout(steps, offset, default)
|
|
||||||
group(state,out), map(Flux.state, defaults)
|
|
||||||
else
|
|
||||||
out, []
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
unrollgraph(m, n; kws...) = unrollgraph(atomise(m), n; kws...)
|
|
||||||
|
|
||||||
# TODO: perhaps split into SeqModel + StatefulModel
|
|
||||||
type Unrolled <: Model
|
|
||||||
model
|
|
||||||
graph::IVertex{Any}
|
|
||||||
state::Vector{Any}
|
|
||||||
stateful::Bool
|
|
||||||
steps::Int
|
|
||||||
end
|
|
||||||
|
|
||||||
(m::Unrolled)(xs...) = interpret(reifyparams(m.graph), xs...)
|
|
||||||
|
|
||||||
graph(u::Unrolled) = u.graph
|
|
||||||
|
|
||||||
function unroll(model, n; seq = true, stateful = true)
|
|
||||||
graph, state = unrollgraph(model, n; seq = seq, stateful = stateful)
|
|
||||||
seq || stateful ? Unrolled(model, graph, state, stateful, n) : Capacitor(graph)
|
|
||||||
end
|
|
||||||
|
|
||||||
function unroll1(model)
|
|
||||||
graph, state = unrollgraph(model, 1; seq = false)
|
|
||||||
graph = group(graph[1], map(x->x[1], inputs(graph)[2:end])...)
|
|
||||||
Unrolled(model, graph, state, false, 1)
|
|
||||||
end
|
|
||||||
|
|
||||||
flip(model) = Capacitor(map(x -> isa(x, Offset) ? -x : x, atomise(model)))
|
|
@ -1,48 +0,0 @@
|
|||||||
using DataFlow: ilinev, iargs, applylines, Line
|
|
||||||
|
|
||||||
type Hint
|
|
||||||
typ
|
|
||||||
end
|
|
||||||
|
|
||||||
DataFlow.tocall(h::Hint, x) = :($x::$(h.typ))
|
|
||||||
|
|
||||||
function gethint(v::IVertex)
|
|
||||||
while isa(value(v), Union{Line,Frame}) v = v[1] end
|
|
||||||
isa(value(v), Hint) && return value(v).typ
|
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
ihint(f, ctx::Context, h::Hint, x) = vertex(h, x)
|
|
||||||
ihint(f, args...) = f(args...)
|
|
||||||
|
|
||||||
hintify(c::Constant) = hintify(state(c.value))
|
|
||||||
hintify(xs::AbstractArray) = vertex(Hint(size(xs)), constant(:_))
|
|
||||||
|
|
||||||
interpshape = mux(ilinev, ihint, iargs, ituple, hintify)
|
|
||||||
|
|
||||||
function hintify(f, xs...)
|
|
||||||
sh = infer(f, map(gethint, xs)...)
|
|
||||||
sh ≠ nothing ? vertex(Hint(sh), vertex(f, xs...)) :
|
|
||||||
!any(x->x==nothing, xs) && graph(f) ≠ nothing ? interpret(Context(interpshape), graph(f), xs...) :
|
|
||||||
vertex(f, xs...)
|
|
||||||
end
|
|
||||||
|
|
||||||
function shapesv(f, args...)
|
|
||||||
(g = graph(f)) == nothing && return
|
|
||||||
ins = [vertex(Hint(d), inputnode(i)) for (i,d) in enumerate(args)]
|
|
||||||
interpret(Context(interpshape), g, ins...)
|
|
||||||
end
|
|
||||||
|
|
||||||
shapes(args...) = shapesv(args...) |> syntax |> applylines |> (x->prettify(x, lines=true))
|
|
||||||
|
|
||||||
# Inference primitives
|
|
||||||
|
|
||||||
infer(f, args...) = graph(f) == nothing ? nothing : gethint(shapesv(f, args...))
|
|
||||||
|
|
||||||
function infer(::typeof(*), a::NTuple{2}, b::NTuple{2})
|
|
||||||
a[2] == b[1] || return nothing
|
|
||||||
(a[1], b[2])
|
|
||||||
end
|
|
||||||
|
|
||||||
# TODO: make correct
|
|
||||||
infer(::typeof(+), a, b) = a
|
|
@ -1,8 +0,0 @@
|
|||||||
export mse, mse!
|
|
||||||
|
|
||||||
function mse!(Δ, pred, target)
|
|
||||||
map!(-, Δ, pred, target)
|
|
||||||
sumabs2(Δ)/2
|
|
||||||
end
|
|
||||||
|
|
||||||
mse(pred, target) = mse(similar(pred), pred, target)
|
|
36
src/data.jl
36
src/data.jl
@ -1,36 +0,0 @@
|
|||||||
export onehot, onecold, chunk, partition, batches, sequences
|
|
||||||
|
|
||||||
"""
|
|
||||||
onehot('b', ['a', 'b', 'c', 'd']) => [false, true, false, false]
|
|
||||||
|
|
||||||
onehot(Float32, 'c', ['a', 'b', 'c', 'd']) => [0., 0., 1., 0.]
|
|
||||||
|
|
||||||
Produce a one-hot-encoded version of an item, given a list of possible values
|
|
||||||
for the item.
|
|
||||||
"""
|
|
||||||
onehot(T::Type, label, labels) = T[i == label for i in labels]
|
|
||||||
onehot(label, labels) = onehot(Int, label, labels)
|
|
||||||
|
|
||||||
"""
|
|
||||||
onecold([0.0, 1.0, 0.0, ...],
|
|
||||||
['a', 'b', 'c', ...]) => 'b'
|
|
||||||
|
|
||||||
The inverse of `onehot`; takes an output prediction vector and a list of
|
|
||||||
possible values, and produces the appropriate value.
|
|
||||||
"""
|
|
||||||
onecold(pred, labels = 1:length(pred)) = labels[findfirst(pred, maximum(pred))]
|
|
||||||
|
|
||||||
using Iterators
|
|
||||||
import Iterators: partition
|
|
||||||
|
|
||||||
export partition
|
|
||||||
|
|
||||||
Base.length(l::Iterators.Partition) = length(l.xs) ÷ l.step
|
|
||||||
|
|
||||||
_partition(r::UnitRange, step::Integer) = (step*(i-1)+1:step*i for i in 1:(r.stop÷step))
|
|
||||||
_partition(xs, step) = (xs[i] for i in _partition(1:length(xs), step))
|
|
||||||
|
|
||||||
chunk(xs, n) = _partition(xs, length(xs)÷n)
|
|
||||||
|
|
||||||
batches(xs...) = (Batch(x) for x in zip(xs...))
|
|
||||||
sequences(xs, len) = (Seq(x) for x in partition(xs, len))
|
|
@ -1,21 +0,0 @@
|
|||||||
export Batch, batchone
|
|
||||||
|
|
||||||
immutable Batch{T,S} <: AbstractVector{T}
|
|
||||||
data::CatMat{T,S}
|
|
||||||
end
|
|
||||||
|
|
||||||
@forward Batch.data size, eltype, getindex, setindex!, rawbatch
|
|
||||||
|
|
||||||
Batch(xs) = Batch(CatMat(xs))
|
|
||||||
|
|
||||||
convert{T,S}(::Type{Batch{T,S}},storage::S) =
|
|
||||||
Batch{T,S}(storage)
|
|
||||||
|
|
||||||
batchone(x) = Batch((x,))
|
|
||||||
batchone(x::Batch) = x
|
|
||||||
|
|
||||||
@render Juno.Inline b::Batch begin
|
|
||||||
Tree(Row(Text("Batch of "), eltype(b),
|
|
||||||
Juno.fade("[$(length(b))]")),
|
|
||||||
Juno.trim(collect(b)))
|
|
||||||
end
|
|
@ -1,50 +0,0 @@
|
|||||||
import Base: eltype, size, getindex, setindex!, convert
|
|
||||||
|
|
||||||
export CatMat, rawbatch
|
|
||||||
|
|
||||||
immutable CatMat{T,S} <: AbstractVector{T}
|
|
||||||
data::S
|
|
||||||
end
|
|
||||||
|
|
||||||
convert{T,S}(::Type{CatMat{T,S}},storage::S) =
|
|
||||||
CatMat{T,S}(storage)
|
|
||||||
|
|
||||||
eltype{T}(::CatMat{T}) = T
|
|
||||||
|
|
||||||
size(b::CatMat) = (size(b.data, 1),)
|
|
||||||
|
|
||||||
getindex(b::CatMat, i)::eltype(b) = slicedim(b.data, 1, i)
|
|
||||||
|
|
||||||
setindex!(b::CatMat, v, i) = b[i, :] = v
|
|
||||||
|
|
||||||
allequal(xs) = all(x -> x == first(xs), xs)
|
|
||||||
|
|
||||||
function (::Type{CatMat{T,S}}){T,S}(xs, storage::S)
|
|
||||||
@assert @>> xs map(size) allequal
|
|
||||||
@assert size(storage) == (length(xs), size(first(xs))...)
|
|
||||||
for i = 1:length(xs)
|
|
||||||
storage[i, :] = xs[i]
|
|
||||||
end
|
|
||||||
return CatMat{T,S}(storage)
|
|
||||||
end
|
|
||||||
|
|
||||||
function (::Type{CatMat{T}}){T}(xs)
|
|
||||||
xs′ = map(rawbatch, xs)
|
|
||||||
storage = similar(first(xs′), (length(xs′), size(first(xs′))...))
|
|
||||||
CatMat{T,typeof(storage)}(xs′, storage)
|
|
||||||
end
|
|
||||||
|
|
||||||
function CatMat(xs)
|
|
||||||
xs = promote(xs...)
|
|
||||||
CatMat{eltype(xs)}(xs)
|
|
||||||
end
|
|
||||||
|
|
||||||
@render Juno.Inline b::CatMat begin
|
|
||||||
Tree(Row(Text("CatMat of "), eltype(b),
|
|
||||||
Juno.fade("[$(length(b))]")),
|
|
||||||
Juno.trim(collect(b)))
|
|
||||||
end
|
|
||||||
|
|
||||||
rawbatch(xs) = xs
|
|
||||||
|
|
||||||
rawbatch(xs::CatMat) = xs.data
|
|
@ -1,20 +0,0 @@
|
|||||||
export seq, Seq, BatchSeq
|
|
||||||
|
|
||||||
immutable Seq{T,S} <: AbstractVector{T}
|
|
||||||
data::CatMat{T,S}
|
|
||||||
end
|
|
||||||
|
|
||||||
@forward Seq.data size, eltype, getindex, setindex!, rawbatch
|
|
||||||
|
|
||||||
Seq(xs) = Seq(CatMat(xs))
|
|
||||||
|
|
||||||
convert{T,S}(::Type{Seq{T,S}},storage::S) =
|
|
||||||
Seq{T,S}(storage)
|
|
||||||
|
|
||||||
@render Juno.Inline b::Seq begin
|
|
||||||
Tree(Row(Text("Seq of "), eltype(b),
|
|
||||||
Juno.fade("[$(length(b))]")),
|
|
||||||
Juno.trim(collect(b)))
|
|
||||||
end
|
|
||||||
|
|
||||||
typealias BatchSeq{T<:Seq} Batch{T}
|
|
@ -1,18 +0,0 @@
|
|||||||
export σ, relu, softmax, flatten
|
|
||||||
|
|
||||||
σ(x) = 1 ./ (1 + exp.(-x))
|
|
||||||
|
|
||||||
back!(::typeof(σ), Δ, x) = Δ .* σ(x)./(1.-σ(x))
|
|
||||||
|
|
||||||
relu(x) = max(0, x)
|
|
||||||
|
|
||||||
back!(::typeof(relu), Δ, x) = Δ .* (x .< 0)
|
|
||||||
|
|
||||||
softmax(xs) = exp.(xs) ./ sum(exp.(xs))
|
|
||||||
|
|
||||||
flatten(xs) = reshape(xs, length(xs))
|
|
||||||
|
|
||||||
shape(::typeof(flatten), in) = prod(in)
|
|
||||||
|
|
||||||
infer(::typeof(softmax), x) = x
|
|
||||||
infer(::typeof(σ), x) = x
|
|
@ -1,20 +0,0 @@
|
|||||||
export Affine
|
|
||||||
|
|
||||||
# TODO: type hints for parameters
|
|
||||||
|
|
||||||
@net type Affine
|
|
||||||
W
|
|
||||||
b
|
|
||||||
x -> x*W + b
|
|
||||||
end
|
|
||||||
|
|
||||||
Affine(in::Integer, out::Integer; init = initn) =
|
|
||||||
Affine(init(in, out), init(1, out))
|
|
||||||
|
|
||||||
@net type Sigmoid
|
|
||||||
layer::Model
|
|
||||||
x -> σ(layer(x))
|
|
||||||
end
|
|
||||||
|
|
||||||
Sigmoid(in::Integer, out::Integer; init = randn) =
|
|
||||||
Sigmoid(Affine(in, out, init = init))
|
|
@ -1,32 +0,0 @@
|
|||||||
export Chain
|
|
||||||
|
|
||||||
function inferchain(ms)
|
|
||||||
chain = []
|
|
||||||
sh = nothing
|
|
||||||
for m in ms
|
|
||||||
m = init(m, single(sh))
|
|
||||||
sh = shape(m, sh)
|
|
||||||
push!(chain, m)
|
|
||||||
end
|
|
||||||
return chain, sh
|
|
||||||
end
|
|
||||||
|
|
||||||
type Chain <: Model
|
|
||||||
layers::Vector{Any}
|
|
||||||
shape
|
|
||||||
function Chain(ms...)
|
|
||||||
ms, shape = inferchain(ms)
|
|
||||||
return new(ms, shape)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
@forward Chain.layers Base.getindex, Base.first, Base.last
|
|
||||||
|
|
||||||
(s::Chain)(x) = foldl((x, m) -> m(x), x, s.layers)
|
|
||||||
back!(s::Chain, Δ) = foldr((m, Δ) -> back!(m, Δ), Δ, s.layers)
|
|
||||||
update!(s::Chain, η) = foreach(l -> update!(l, η), s.layers)
|
|
||||||
|
|
||||||
graph(s::Chain) =
|
|
||||||
foldl((v, m) -> vertex(m, v), constant(inputnode(1)), s.layers)
|
|
||||||
|
|
||||||
shape(c::Chain, in) = c.shape
|
|
@ -1,51 +0,0 @@
|
|||||||
export Recurrent, GatedRecurrent, LSTM
|
|
||||||
|
|
||||||
@net type Recurrent
|
|
||||||
Wxy; Wyy; by
|
|
||||||
y
|
|
||||||
function (x)
|
|
||||||
y = tanh( x * Wxy + y{-1} * Wyy + by )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
Recurrent(in, out; init = initn) =
|
|
||||||
Recurrent(init((in, out)), init((out, out)), init(out), init(out))
|
|
||||||
|
|
||||||
@net type GatedRecurrent
|
|
||||||
Wxr; Wyr; br
|
|
||||||
Wxu; Wyu; bu
|
|
||||||
Wxh; Wyh; bh
|
|
||||||
y
|
|
||||||
function (x)
|
|
||||||
reset = σ( x * Wxr + y{-1} * Wyr + br )
|
|
||||||
update = σ( x * Wxu + y{-1} * Wyu + bu )
|
|
||||||
y′ = tanh( x * Wxh + (reset .* y{-1}) * Wyh + bh )
|
|
||||||
y = (1 .- update) .* y′ + update .* y{-1}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
GatedRecurrent(in, out; init = initn) =
|
|
||||||
GatedRecurrent(vcat([[init((in, out)), init((out, out)), init(out)] for _ = 1:3]...)...,
|
|
||||||
zeros(Float32, out))
|
|
||||||
|
|
||||||
@net type LSTM
|
|
||||||
Wxf; Wyf; bf
|
|
||||||
Wxi; Wyi; bi
|
|
||||||
Wxo; Wyo; bo
|
|
||||||
Wxc; Wyc; bc
|
|
||||||
y; state
|
|
||||||
function (x)
|
|
||||||
# Gates
|
|
||||||
forget = σ( x * Wxf + y{-1} * Wyf + bf )
|
|
||||||
input = σ( x * Wxi + y{-1} * Wyi + bi )
|
|
||||||
output = σ( x * Wxo + y{-1} * Wyo + bo )
|
|
||||||
# State update and output
|
|
||||||
state′ = tanh( x * Wxc + y{-1} * Wyc + bc )
|
|
||||||
state = forget .* state{-1} + input .* state′
|
|
||||||
y = output .* tanh(state)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
LSTM(in, out; init = initn) =
|
|
||||||
LSTM(vcat([[init((in, out)), init((out, out)), init((1, out))] for _ = 1:4]...)...,
|
|
||||||
zeros(Float32, out), zeros(Float32, out))
|
|
@ -1,47 +0,0 @@
|
|||||||
export Input
|
|
||||||
|
|
||||||
typealias Dims{N} NTuple{N,Int}
|
|
||||||
|
|
||||||
dims(d::Dims) = d
|
|
||||||
|
|
||||||
dims(i...) = (i...,)
|
|
||||||
|
|
||||||
single(i) = i
|
|
||||||
single(i::Dims) = length(i) == 1 ? first(i) : i
|
|
||||||
|
|
||||||
# Shim for kicking off shape inference
|
|
||||||
|
|
||||||
type ShapeError <: Exception
|
|
||||||
layer
|
|
||||||
shape
|
|
||||||
end
|
|
||||||
|
|
||||||
type Input{N} <: Model
|
|
||||||
dims::Dims{N}
|
|
||||||
end
|
|
||||||
|
|
||||||
Input(i...) = Input(dims(i...))
|
|
||||||
|
|
||||||
(::Input)(x) = x
|
|
||||||
back!(::Input, Δ, x) = Δ
|
|
||||||
|
|
||||||
# Initialise placeholder
|
|
||||||
|
|
||||||
type Init{F}
|
|
||||||
f::F
|
|
||||||
end
|
|
||||||
|
|
||||||
init(i::Init, input...) = i.f(input...)
|
|
||||||
init(m, input...) = m
|
|
||||||
|
|
||||||
# Shape inference API
|
|
||||||
|
|
||||||
shape(x, in) = in
|
|
||||||
|
|
||||||
shape(i::Input, _) = i.dims
|
|
||||||
|
|
||||||
# Implementation for bundled layers
|
|
||||||
|
|
||||||
shape(d::Affine, _) = length(state(d.b)) # TODO: could perhaps infer this
|
|
||||||
|
|
||||||
Affine(out::Integer) = Init(in::Integer -> Affine(in, out))
|
|
@ -1,44 +0,0 @@
|
|||||||
export Conv2D, MaxPool, Reshape
|
|
||||||
|
|
||||||
type Conv2D <: Model
|
|
||||||
filter::Param{Array{Float32,4}} # [height, width, inchans, outchans]
|
|
||||||
stride::Dims{2}
|
|
||||||
end
|
|
||||||
|
|
||||||
Conv2D(size; in = 1, out = 1, stride = (1,1), init = initn) =
|
|
||||||
Conv2D(param(initn(size..., in, out)), stride)
|
|
||||||
|
|
||||||
shape(c::Conv2D, in::Dims{2}) =
|
|
||||||
(map(i -> (in[i]-size(c.filter,i))÷c.stride[i]+1, (1,2))..., size(c.filter, 4))
|
|
||||||
|
|
||||||
shape(c::Conv2D, in::Dims{3}) =
|
|
||||||
shape(c, (in[1],in[2]))
|
|
||||||
|
|
||||||
type MaxPool <: Model
|
|
||||||
size::Dims{2}
|
|
||||||
stride::Dims{2}
|
|
||||||
end
|
|
||||||
|
|
||||||
MaxPool(size; stride = (1,1)) =
|
|
||||||
MaxPool(size, stride)
|
|
||||||
|
|
||||||
shape(c::MaxPool, in::Dims{2}) =
|
|
||||||
map(i -> (in[i]-c.size[i])÷c.stride[i]+1, (1,2))
|
|
||||||
|
|
||||||
shape(c::MaxPool, in::Dims{3}) =
|
|
||||||
(shape(c, (in[1],in[2]))..., in[3])
|
|
||||||
|
|
||||||
shape(c::MaxPool, in) = throw(ShapeError(c, in))
|
|
||||||
|
|
||||||
immutable Reshape{N}
|
|
||||||
dims::Dims{N}
|
|
||||||
end
|
|
||||||
|
|
||||||
Reshape(dims::Integer...) = Reshape(dims)
|
|
||||||
|
|
||||||
function shape(r::Reshape, dims)
|
|
||||||
prod(dims) == prod(r.dims) || throw(ShapeError(r, dims))
|
|
||||||
return r.dims
|
|
||||||
end
|
|
||||||
|
|
||||||
shape(r::Reshape, ::Void) = r.dims
|
|
111
src/model.jl
111
src/model.jl
@ -1,111 +0,0 @@
|
|||||||
export Model, back!, update!, param
|
|
||||||
|
|
||||||
# Basic model API
|
|
||||||
|
|
||||||
"""
|
|
||||||
(m::Model)(X...) => Y
|
|
||||||
|
|
||||||
A "model" is a function with state. For example, a logistic regression is the
|
|
||||||
function
|
|
||||||
|
|
||||||
x -> σ(x * W + b)
|
|
||||||
|
|
||||||
where `W` and `b` are a trainable matrix and vector of weights repectively. The
|
|
||||||
`Model` abstract type is used loosely; in general the concept of a model is
|
|
||||||
closer to a protocol, and models don't need to inherit from this type. Normal
|
|
||||||
Julia functions are models with 0 parameters, for example.
|
|
||||||
"""
|
|
||||||
abstract Model
|
|
||||||
|
|
||||||
"""
|
|
||||||
back!(m::Model, ΔY, X...) => ΔX
|
|
||||||
|
|
||||||
Backpropagate the gradient `ΔY` through the model `m`, accumulating the
|
|
||||||
gradients of any parameters. Returns the gradient of the input `X`. Gradients
|
|
||||||
may be arrays or tuples of arrays (for multiple inputs/outputs).
|
|
||||||
"""
|
|
||||||
back!(m::Model, Δ, xs...) = error("Backprop not implemented for $(typeof(m))")
|
|
||||||
|
|
||||||
"""
|
|
||||||
update!(m::Model, η) => m
|
|
||||||
|
|
||||||
Update the parameters of the model `m` using the accumulated gradients from
|
|
||||||
`back!`, using the learning rate `η`.
|
|
||||||
"""
|
|
||||||
update!(m, η) = m
|
|
||||||
|
|
||||||
"""
|
|
||||||
graph(m::Model) => ::IVertex{Any} | nothing
|
|
||||||
|
|
||||||
Returns the graph representation of the model, if any. Most models are built
|
|
||||||
from lower-level components and can simply implement this method to get most of
|
|
||||||
Flux's functionality. If this method isn't available, functionality like
|
|
||||||
backpropagation or conversion for backend must be implemented on a case-by-case
|
|
||||||
basis. Alternatively, one can implement this method and override individual
|
|
||||||
methods as necessary.
|
|
||||||
"""
|
|
||||||
graph(m) = nothing
|
|
||||||
|
|
||||||
# Model parameters
|
|
||||||
|
|
||||||
"""
|
|
||||||
A `Param` object stores a parameter array along with an accumulated delta to
|
|
||||||
that array. When converting to backends like TensorFlow, identical `Param`s will
|
|
||||||
result in identical variable objects, making model reuse trivial.
|
|
||||||
"""
|
|
||||||
type Param{T}
|
|
||||||
x::T
|
|
||||||
Δx::T
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
param(x::T) => ::Param{T}
|
|
||||||
|
|
||||||
Convenience method for creating a `Param` object for a given array.
|
|
||||||
"""
|
|
||||||
param(x) = Param(x, zero(x))
|
|
||||||
|
|
||||||
state(p::Param) = p.x
|
|
||||||
|
|
||||||
"""
|
|
||||||
accumulate!(p::Param, Δ) => p
|
|
||||||
|
|
||||||
Accumulates the update `Δ` on `p`. The value of `p` won't change until
|
|
||||||
`update!`.
|
|
||||||
"""
|
|
||||||
function accumulate!(p::Param, Δ)
|
|
||||||
p.Δx += Δ
|
|
||||||
return p
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
update!(p::Param)
|
|
||||||
|
|
||||||
Apply the accumulated updates to the value of the parameter.
|
|
||||||
"""
|
|
||||||
function update!(p::Param, η)
|
|
||||||
p.x .-= p.Δx .* η
|
|
||||||
p.Δx[:] = 0
|
|
||||||
return p
|
|
||||||
end
|
|
||||||
|
|
||||||
state(x) = x
|
|
||||||
accumulate!(x, Δ) = x
|
|
||||||
|
|
||||||
@forward Param.x Base.size
|
|
||||||
|
|
||||||
function Base.show(io::IO, p::Param)
|
|
||||||
print(io, "Param", size(p.x))
|
|
||||||
end
|
|
||||||
|
|
||||||
# Anonymous models
|
|
||||||
|
|
||||||
export Capacitor
|
|
||||||
|
|
||||||
type Capacitor <: Model
|
|
||||||
graph::IVertex{Any}
|
|
||||||
end
|
|
||||||
|
|
||||||
(m::Capacitor)(xs...) = interpret(reifyparams(m.graph), xs...)
|
|
||||||
|
|
||||||
graph(cap::Capacitor) = cap.graph
|
|
30
src/utils.jl
30
src/utils.jl
@ -1,30 +0,0 @@
|
|||||||
export AArray
|
|
||||||
|
|
||||||
const AArray = AbstractArray
|
|
||||||
|
|
||||||
initn(dims...) = randn(Float32, dims...)/10
|
|
||||||
|
|
||||||
function train!(m, train, test = []; epoch = 1, batch = 10, η = 0.1)
|
|
||||||
i = 0
|
|
||||||
Δ = zeros(length(train[1][2]))
|
|
||||||
for _ in 1:epoch
|
|
||||||
@progress for (x, y) in train
|
|
||||||
i += 1
|
|
||||||
pred = m(x)
|
|
||||||
any(isnan, pred) && error("NaN")
|
|
||||||
err = mse!(Δ, pred, y)
|
|
||||||
back!(m, Δ, x)
|
|
||||||
i % batch == 0 && update!(m, η)
|
|
||||||
i % 1000 == 0 && @show accuracy(m, test)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return m
|
|
||||||
end
|
|
||||||
|
|
||||||
function accuracy(m, data)
|
|
||||||
correct = 0
|
|
||||||
for (x, y) in data
|
|
||||||
onecold(m(x)) == onecold(y) && (correct += 1)
|
|
||||||
end
|
|
||||||
return correct/length(data)
|
|
||||||
end
|
|
@ -1,10 +0,0 @@
|
|||||||
xs = randn(10)' # TODO: batching semantics
|
|
||||||
|
|
||||||
d = Affine(10, 20)
|
|
||||||
|
|
||||||
@test d(xs) == xs*d.W.x + d.b.x
|
|
||||||
|
|
||||||
let
|
|
||||||
@capture(syntax(d), _Frame(_Line(x_[1] * W_ + b_)))
|
|
||||||
@test isa(x, Input) && isa(W, Param) && isa(b, Param)
|
|
||||||
end
|
|
@ -1,10 +0,0 @@
|
|||||||
using Flux, DataFlow, MacroTools, Base.Test
|
|
||||||
using Flux: graph, Param
|
|
||||||
using DataFlow: Input, Line, Frame
|
|
||||||
|
|
||||||
syntax(v::Vertex) = prettify(DataFlow.syntax(v))
|
|
||||||
syntax(x) = syntax(graph(x))
|
|
||||||
|
|
||||||
include("basic.jl")
|
|
||||||
include("recurrent.jl")
|
|
||||||
include("backend.jl")
|
|
Loading…
Reference in New Issue
Block a user