will sort these out later
This commit is contained in:
parent
b17eb78e0a
commit
4604e9f515
@ -1,24 +0,0 @@
|
|||||||
using Flux, MNIST
|
|
||||||
using Flux: accuracy, onehot, tobatch
|
|
||||||
|
|
||||||
data = [(trainfeatures(i), onehot(trainlabel(i), 0:9)) for i = 1:60_000]
|
|
||||||
train = data[1:50_000]
|
|
||||||
test = data[50_001:60_000]
|
|
||||||
|
|
||||||
m = @Chain(
|
|
||||||
Input(784),
|
|
||||||
Affine(128), relu,
|
|
||||||
Affine( 64), relu,
|
|
||||||
Affine( 10), softmax)
|
|
||||||
|
|
||||||
# Convert to MXNet
|
|
||||||
model = mxnet(m)
|
|
||||||
|
|
||||||
# An example prediction pre-training
|
|
||||||
model(tobatch(data[1][1]))
|
|
||||||
|
|
||||||
Flux.train!(model, train, η = 1e-3,
|
|
||||||
cb = [()->@show accuracy(m, test)])
|
|
||||||
|
|
||||||
# An example prediction post-training
|
|
||||||
model(tobatch(data[1][1]))
|
|
@ -1,41 +0,0 @@
|
|||||||
using Flux
|
|
||||||
using Flux: onehot, logloss, unsqueeze
|
|
||||||
using Flux.Batches: Batch, tobatch, seqs, chunk
|
|
||||||
import StatsBase: wsample
|
|
||||||
|
|
||||||
nunroll = 50
|
|
||||||
nbatch = 50
|
|
||||||
|
|
||||||
encode(input) = seqs((onehot(ch, alphabet) for ch in input), nunroll)
|
|
||||||
|
|
||||||
cd(@__DIR__)
|
|
||||||
input = readstring("shakespeare_input.txt");
|
|
||||||
alphabet = unique(input)
|
|
||||||
N = length(alphabet)
|
|
||||||
|
|
||||||
Xs = (Batch(ss) for ss in zip(encode.(chunk(input, 50))...))
|
|
||||||
Ys = (Batch(ss) for ss in zip(encode.(chunk(input[2:end], 50))...))
|
|
||||||
|
|
||||||
model = Chain(
|
|
||||||
LSTM(N, 256),
|
|
||||||
LSTM(256, 256),
|
|
||||||
Affine(256, N),
|
|
||||||
softmax)
|
|
||||||
|
|
||||||
m = mxnet(unroll(model, nunroll))
|
|
||||||
|
|
||||||
eval = tobatch.(first.(drop.((Xs, Ys), 5)))
|
|
||||||
evalcb = () -> @show logloss(m(eval[1]), eval[2])
|
|
||||||
|
|
||||||
# @time Flux.train!(m, zip(Xs, Ys), η = 0.001, loss = logloss, cb = [evalcb], epoch = 10)
|
|
||||||
|
|
||||||
function sample(model, n, temp = 1)
|
|
||||||
s = [rand(alphabet)]
|
|
||||||
m = unroll1(model)
|
|
||||||
for i = 1:n-1
|
|
||||||
push!(s, wsample(alphabet, softmax(m(unsqueeze(onehot(s[end], alphabet)))./temp)[1,:]))
|
|
||||||
end
|
|
||||||
return string(s...)
|
|
||||||
end
|
|
||||||
|
|
||||||
# s = sample(model[1:end-1], 100)
|
|
@ -1,35 +0,0 @@
|
|||||||
using Flux, MXNet
|
|
||||||
|
|
||||||
Flux.loadmx()
|
|
||||||
|
|
||||||
conv1 = Chain(
|
|
||||||
Conv2D((5,5), out = 20), tanh,
|
|
||||||
MaxPool((2,2), stride = (2,2)))
|
|
||||||
|
|
||||||
conv2 = Chain(
|
|
||||||
Conv2D((5,5), in = 20, out = 50), tanh,
|
|
||||||
MaxPool((2,2), stride = (2,2)))
|
|
||||||
|
|
||||||
lenet = @Chain(
|
|
||||||
Input(28,28,1),
|
|
||||||
conv1, conv2,
|
|
||||||
flatten,
|
|
||||||
Affine(500), tanh,
|
|
||||||
Affine(10), softmax)
|
|
||||||
|
|
||||||
#--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Now we can continue exactly as in plain MXNet, following
|
|
||||||
# https://github.com/dmlc/MXNet.jl/blob/master/examples/mnist/lenet.jl
|
|
||||||
|
|
||||||
batch_size = 100
|
|
||||||
include(Pkg.dir("MXNet", "examples", "mnist", "mnist-data.jl"))
|
|
||||||
train_provider, eval_provider = get_mnist_providers(batch_size; flat=false)
|
|
||||||
|
|
||||||
model = mx.FeedForward(lenet)
|
|
||||||
|
|
||||||
mx.infer_shape(model.arch, data = (28, 28, 1, 100))
|
|
||||||
|
|
||||||
optimizer = mx.SGD(lr=0.05, momentum=0.9, weight_decay=0.00001)
|
|
||||||
|
|
||||||
mx.fit(model, optimizer, train_provider, n_epoch=1, eval_data=eval_provider)
|
|
@ -1,54 +0,0 @@
|
|||||||
using Flux, Juno
|
|
||||||
|
|
||||||
conv1 = Chain(
|
|
||||||
Conv2D((5,5), out = 20), tanh,
|
|
||||||
MaxPool((2,2), stride = (2,2)))
|
|
||||||
|
|
||||||
conv2 = Chain(
|
|
||||||
Conv2D((5,5), in = 20, out = 50), tanh,
|
|
||||||
MaxPool((2,2), stride = (2,2)))
|
|
||||||
|
|
||||||
lenet = @Chain(
|
|
||||||
Input(28,28,1),
|
|
||||||
conv1, conv2,
|
|
||||||
flatten,
|
|
||||||
Affine(500), tanh,
|
|
||||||
Affine(10), softmax)
|
|
||||||
|
|
||||||
#--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Now we can continue exactly as in plain TensorFlow, following
|
|
||||||
# https://github.com/malmaud/TensorFlow.jl/blob/master/examples/mnist_full.jl
|
|
||||||
# (taking only the training and cost logic, not the graph building steps)
|
|
||||||
|
|
||||||
using TensorFlow, Distributions
|
|
||||||
|
|
||||||
include(Pkg.dir("TensorFlow", "examples", "mnist_loader.jl"))
|
|
||||||
loader = DataLoader()
|
|
||||||
|
|
||||||
session = Session(Graph())
|
|
||||||
|
|
||||||
x = placeholder(Float32)
|
|
||||||
y′ = placeholder(Float32)
|
|
||||||
y = Tensor(lenet, x)
|
|
||||||
|
|
||||||
cross_entropy = reduce_mean(-reduce_sum(y′.*log(y), reduction_indices=[2]))
|
|
||||||
|
|
||||||
train_step = train.minimize(train.AdamOptimizer(1e-4), cross_entropy)
|
|
||||||
|
|
||||||
accuracy = reduce_mean(cast(indmax(y, 2) .== indmax(y′, 2), Float32))
|
|
||||||
|
|
||||||
run(session, global_variables_initializer())
|
|
||||||
|
|
||||||
@progress for i in 1:1000
|
|
||||||
batch = next_batch(loader, 50)
|
|
||||||
if i%100 == 1
|
|
||||||
train_accuracy = run(session, accuracy, Dict(x=>batch[1], y′=>batch[2]))
|
|
||||||
info("step $i, training accuracy $train_accuracy")
|
|
||||||
end
|
|
||||||
run(session, train_step, Dict(x=>batch[1], y′=>batch[2]))
|
|
||||||
end
|
|
||||||
|
|
||||||
testx, testy = load_test_set()
|
|
||||||
test_accuracy = run(session, accuracy, Dict(x=>testx, y′=>testy))
|
|
||||||
info("test accuracy $test_accuracy")
|
|
@ -1,50 +0,0 @@
|
|||||||
# Based on https://arxiv.org/abs/1409.0473
|
|
||||||
|
|
||||||
using Flux
|
|
||||||
using Flux: Batch, Seq, param, flip, stateless, broadcastto, ∘
|
|
||||||
|
|
||||||
Nbatch = 3 # Number of phrases to batch together
|
|
||||||
Nphrase = 5 # The length of (padded) phrases
|
|
||||||
Nalpha = 7 # The size of the token vector
|
|
||||||
Nhidden = 10 # The size of the hidden state
|
|
||||||
|
|
||||||
# A recurrent model which takes a token and returns a context-dependent
|
|
||||||
# annotation.
|
|
||||||
|
|
||||||
forward = LSTM(Nalpha, Nhidden÷2)
|
|
||||||
backward = flip(LSTM(Nalpha, Nhidden÷2))
|
|
||||||
encoder = @net token -> hcat(forward(token), backward(token))
|
|
||||||
|
|
||||||
alignnet = Affine(2Nhidden, 1)
|
|
||||||
align = @net (s, t) -> alignnet(hcat(broadcastto(s, (Nbatch, 1)), t))
|
|
||||||
|
|
||||||
# A recurrent model which takes a sequence of annotations, attends, and returns
|
|
||||||
# a predicted output token.
|
|
||||||
|
|
||||||
recur = unroll1(LSTM(Nhidden, Nhidden)).model
|
|
||||||
state = param(zeros(1, Nhidden))
|
|
||||||
y = param(zeros(1, Nhidden))
|
|
||||||
toalpha = Affine(Nhidden, Nalpha)
|
|
||||||
|
|
||||||
decoder = @net function (tokens)
|
|
||||||
energies = map(token -> exp.(align(state{-1}, token)), tokens)
|
|
||||||
weights = map(e -> e ./ sum(energies), energies)
|
|
||||||
context = sum(map(∘, weights, tokens))
|
|
||||||
(y, state), _ = recur((y{-1},state{-1}), context)
|
|
||||||
return softmax(toalpha(y))
|
|
||||||
end
|
|
||||||
|
|
||||||
# Building the full model
|
|
||||||
|
|
||||||
a, b = rand(Nbatch, Nalpha), rand(Nbatch, Nalpha)
|
|
||||||
|
|
||||||
model = @Chain(
|
|
||||||
stateless(unroll(encoder, Nphrase)),
|
|
||||||
@net(x -> repeated(x, Nphrase)),
|
|
||||||
stateless(unroll(decoder, Nphrase)))
|
|
||||||
|
|
||||||
model = mxnet(Flux.SeqModel(model, Nphrase))
|
|
||||||
|
|
||||||
xs = Batch([Seq(rand(Nalpha) for i = 1:Nphrase) for i = 1:Nbatch])
|
|
||||||
|
|
||||||
model(xs)
|
|
Loading…
Reference in New Issue
Block a user