will sort these out later

This commit is contained in:
Mike J Innes 2017-08-18 00:33:39 +01:00
parent b17eb78e0a
commit 4604e9f515
5 changed files with 0 additions and 204 deletions

View File

@ -1,24 +0,0 @@
using Flux, MNIST
using Flux: accuracy, onehot, tobatch
data = [(trainfeatures(i), onehot(trainlabel(i), 0:9)) for i = 1:60_000]
train = data[1:50_000]
test = data[50_001:60_000]
m = @Chain(
Input(784),
Affine(128), relu,
Affine( 64), relu,
Affine( 10), softmax)
# Convert to MXNet
model = mxnet(m)
# An example prediction pre-training
model(tobatch(data[1][1]))
Flux.train!(model, train, η = 1e-3,
cb = [()->@show accuracy(m, test)])
# An example prediction post-training
model(tobatch(data[1][1]))

View File

@ -1,41 +0,0 @@
using Flux
using Flux: onehot, logloss, unsqueeze
using Flux.Batches: Batch, tobatch, seqs, chunk
import StatsBase: wsample
nunroll = 50
nbatch = 50
encode(input) = seqs((onehot(ch, alphabet) for ch in input), nunroll)
cd(@__DIR__)
input = readstring("shakespeare_input.txt");
alphabet = unique(input)
N = length(alphabet)
Xs = (Batch(ss) for ss in zip(encode.(chunk(input, 50))...))
Ys = (Batch(ss) for ss in zip(encode.(chunk(input[2:end], 50))...))
model = Chain(
LSTM(N, 256),
LSTM(256, 256),
Affine(256, N),
softmax)
m = mxnet(unroll(model, nunroll))
eval = tobatch.(first.(drop.((Xs, Ys), 5)))
evalcb = () -> @show logloss(m(eval[1]), eval[2])
# @time Flux.train!(m, zip(Xs, Ys), η = 0.001, loss = logloss, cb = [evalcb], epoch = 10)
function sample(model, n, temp = 1)
s = [rand(alphabet)]
m = unroll1(model)
for i = 1:n-1
push!(s, wsample(alphabet, softmax(m(unsqueeze(onehot(s[end], alphabet)))./temp)[1,:]))
end
return string(s...)
end
# s = sample(model[1:end-1], 100)

View File

@ -1,35 +0,0 @@
using Flux, MXNet
Flux.loadmx()
conv1 = Chain(
Conv2D((5,5), out = 20), tanh,
MaxPool((2,2), stride = (2,2)))
conv2 = Chain(
Conv2D((5,5), in = 20, out = 50), tanh,
MaxPool((2,2), stride = (2,2)))
lenet = @Chain(
Input(28,28,1),
conv1, conv2,
flatten,
Affine(500), tanh,
Affine(10), softmax)
#--------------------------------------------------------------------------------
# Now we can continue exactly as in plain MXNet, following
# https://github.com/dmlc/MXNet.jl/blob/master/examples/mnist/lenet.jl
batch_size = 100
include(Pkg.dir("MXNet", "examples", "mnist", "mnist-data.jl"))
train_provider, eval_provider = get_mnist_providers(batch_size; flat=false)
model = mx.FeedForward(lenet)
mx.infer_shape(model.arch, data = (28, 28, 1, 100))
optimizer = mx.SGD(lr=0.05, momentum=0.9, weight_decay=0.00001)
mx.fit(model, optimizer, train_provider, n_epoch=1, eval_data=eval_provider)

View File

@ -1,54 +0,0 @@
using Flux, Juno
conv1 = Chain(
Conv2D((5,5), out = 20), tanh,
MaxPool((2,2), stride = (2,2)))
conv2 = Chain(
Conv2D((5,5), in = 20, out = 50), tanh,
MaxPool((2,2), stride = (2,2)))
lenet = @Chain(
Input(28,28,1),
conv1, conv2,
flatten,
Affine(500), tanh,
Affine(10), softmax)
#--------------------------------------------------------------------------------
# Now we can continue exactly as in plain TensorFlow, following
# https://github.com/malmaud/TensorFlow.jl/blob/master/examples/mnist_full.jl
# (taking only the training and cost logic, not the graph building steps)
using TensorFlow, Distributions
include(Pkg.dir("TensorFlow", "examples", "mnist_loader.jl"))
loader = DataLoader()
session = Session(Graph())
x = placeholder(Float32)
y = placeholder(Float32)
y = Tensor(lenet, x)
cross_entropy = reduce_mean(-reduce_sum(y.*log(y), reduction_indices=[2]))
train_step = train.minimize(train.AdamOptimizer(1e-4), cross_entropy)
accuracy = reduce_mean(cast(indmax(y, 2) .== indmax(y, 2), Float32))
run(session, global_variables_initializer())
@progress for i in 1:1000
batch = next_batch(loader, 50)
if i%100 == 1
train_accuracy = run(session, accuracy, Dict(x=>batch[1], y=>batch[2]))
info("step $i, training accuracy $train_accuracy")
end
run(session, train_step, Dict(x=>batch[1], y=>batch[2]))
end
testx, testy = load_test_set()
test_accuracy = run(session, accuracy, Dict(x=>testx, y=>testy))
info("test accuracy $test_accuracy")

View File

@ -1,50 +0,0 @@
# Based on https://arxiv.org/abs/1409.0473
using Flux
using Flux: Batch, Seq, param, flip, stateless, broadcastto,
Nbatch = 3 # Number of phrases to batch together
Nphrase = 5 # The length of (padded) phrases
Nalpha = 7 # The size of the token vector
Nhidden = 10 # The size of the hidden state
# A recurrent model which takes a token and returns a context-dependent
# annotation.
forward = LSTM(Nalpha, Nhidden÷2)
backward = flip(LSTM(Nalpha, Nhidden÷2))
encoder = @net token -> hcat(forward(token), backward(token))
alignnet = Affine(2Nhidden, 1)
align = @net (s, t) -> alignnet(hcat(broadcastto(s, (Nbatch, 1)), t))
# A recurrent model which takes a sequence of annotations, attends, and returns
# a predicted output token.
recur = unroll1(LSTM(Nhidden, Nhidden)).model
state = param(zeros(1, Nhidden))
y = param(zeros(1, Nhidden))
toalpha = Affine(Nhidden, Nalpha)
decoder = @net function (tokens)
energies = map(token -> exp.(align(state{-1}, token)), tokens)
weights = map(e -> e ./ sum(energies), energies)
context = sum(map(, weights, tokens))
(y, state), _ = recur((y{-1},state{-1}), context)
return softmax(toalpha(y))
end
# Building the full model
a, b = rand(Nbatch, Nalpha), rand(Nbatch, Nalpha)
model = @Chain(
stateless(unroll(encoder, Nphrase)),
@net(x -> repeated(x, Nphrase)),
stateless(unroll(decoder, Nphrase)))
model = mxnet(Flux.SeqModel(model, Nphrase))
xs = Batch([Seq(rand(Nalpha) for i = 1:Nphrase) for i = 1:Nbatch])
model(xs)