Added initial files

This commit is contained in:
Eddie Cueto 2023-05-24 20:20:51 +01:00
parent 2f99a87f77
commit ebf272f722
3 changed files with 447 additions and 0 deletions

163
BNN/BNN.jl Executable file
View File

@ -0,0 +1,163 @@
#=
network_shape = [
((5, 5), 1=>6, relu),
((2, 2)),
((5, 5), 6=>16, relu),
((2, 2)),
Flux.flatten,
(256 => 120, relu),
(120 => 84, relu),
(84 => 10),
];
=#
#=
lenet = Chain(
Conv((5, 5), 1=>6, relu),
MaxPool((2, 2)),
Conv((5, 5), 6=>16, relu),
MaxPool((2, 2)),
Flux.flatten,
Dense(256 => 120, relu),
Dense(120 => 84, relu),
Dense(84 => 10),
)
=#
#########################################################
# Import libraries.
using Turing, Flux, Plots, Random, ReverseDiff, MLDatasets
include("./aux_func.jl")
# Hide sampling progress.
Turing.setprogress!(false);
# Use reverse_diff due to the number of parameters in neural networks.
Turing.setadbackend(:reversediff)
train_mnist, test_mnist = get_data("mnist")
#train_cifar, test_cifar = get_data("cifar")
# Number of points to generate.
#N = 80;
#M = round(Int, N / 4);
Random.seed!(1234)
#=
# Generate artificial data.
x1s = rand(M) * 4.5;
x2s = rand(M) * 4.5;
xt1s = Array([[x1s[i] + 0.5; x2s[i] + 0.5] for i in 1:M])
x1s = rand(M) * 4.5;
x2s = rand(M) * 4.5;
append!(xt1s, Array([[x1s[i] - 5; x2s[i] - 5] for i in 1:M]))
x1s = rand(M) * 4.5;
x2s = rand(M) * 4.5;
xt0s = Array([[x1s[i] + 0.5; x2s[i] - 5] for i in 1:M])
x1s = rand(M) * 4.5;
x2s = rand(M) * 4.5;
append!(xt0s, Array([[x1s[i] - 5; x2s[i] + 0.5] for i in 1:M]))
# Store all the data for later.
xs = [xt1s; xt0s]
ts = [ones(2 * M); zeros(2 * M)]
# Plot data points.
function plot_data()
x1 = map(e -> e[1], xt1s)
y1 = map(e -> e[2], xt1s)
x2 = map(e -> e[1], xt0s)
y2 = map(e -> e[2], xt0s)
Plots.scatter(x1, y1; color="red", clim=(0, 1))
return Plots.scatter!(x2, y2; color="blue", clim=(0, 1))
end
plot_data()
=#
# Construct a neural network using Flux
lenet = Chain(
Conv((5, 5), 1=>6, relu),
MaxPool((2, 2)),
Conv((5, 5), 6=>16, relu),
MaxPool((2, 2)),
Flux.flatten,
Dense(256 => 120, relu),
Dense(120 => 84, relu),
Dense(84 => 10),
)
batches = loader(train_mnist);
xs = [];
ys = [];
for b in batches
push!(xs,b[1])
push!(ys,b[2])
end
#x1, y1 = first(loader(train_mnist)); # (28×28×1×64 Array{Float32, 3}, 10×64 OneHotMatrix(::Vector{UInt32}))
# Extract weights and a helper function to reconstruct NN from weights
parameters_initial, reconstruct = Flux.destructure(lenet);
tot_param = length(parameters_initial); # number of parameters in NN
# Perform inference.
N = 5000;
ch = sample(
bayes_nn(xs, ys, tot_params, reconstruct), HMC(0.05, 4), N
);
# Extract all weight and bias parameters.
theta = MCMCChains.group(ch, :parameters).value;
# Plot the data we have.
plot_data()
# Find the index that provided the highest log posterior in the chain.
_, i = findmax(ch[:lp]);
# Extract the max row value from i.
i = i.I[1];
# Plot the posterior distribution with a contour plot
x1_range = collect(range(-6; stop=6, length=25));
x2_range = collect(range(-6; stop=6, length=25));
Z = [nn_forward([x1, x2], theta[i, :])[1] for x1 in x1_range, x2 in x2_range];
contour!(x1_range, x2_range, Z)
# Plot the average prediction.
plot_data()
n_end = 1500;
x1_range = collect(range(-6; stop=6, length=25));
x2_range = collect(range(-6; stop=6, length=25));
Z = [nn_predict([x1, x2], theta, n_end)[1] for x1 in x1_range, x2 in x2_range];
contour!(x1_range, x2_range, Z)
# Number of iterations to plot.
n_end = 500;
anim = @gif for i in 1:n_end
plot_data()
Z = [nn_forward([x1, x2], theta[i, :])[1] for x1 in x1_range, x2 in x2_range]
contour!(x1_range, x2_range, Z; title="Iteration $i", clim=(0, 1))
end every 5

222
FNN/FNN.jl Executable file
View File

@ -0,0 +1,222 @@
# Classification of MNIST dataset using a convolutional network,
# which is a variant of the original LeNet from 1998.
# This example uses a GPU if you have one.
# And demonstrates how to save model state.
using MLDatasets, Flux, JLD2, CUDA # this will install everything if necc.
folder = "runs" # sub-directory in which to save
isdir(folder) || mkdir(folder)
filename = joinpath(folder, "lenet.jld2")
#===== DATA =====#
# Calling MLDatasets.MNIST() will dowload the dataset if necessary,
# and return a struct containing it.
# It takes a few seconds to read from disk each time, so do this once:
train_data = MLDatasets.MNIST() # i.e. split=:train
test_data = MLDatasets.MNIST(split=:test)
# train_data.features is a 28×28×60000 Array{Float32, 3} of the images.
# Flux needs a 4D array, with the 3rd dim for channels -- here trivial, grayscale.
# Combine the reshape needed with other pre-processing:
function loader(data::MNIST=train_data; batchsize::Int=64)
x4dim = reshape(data.features, 28,28,1,:) # insert trivial channel dim
yhot = Flux.onehotbatch(data.targets, 0:9) # make a 10×60000 OneHotMatrix
Flux.DataLoader((x4dim, yhot); batchsize, shuffle=true) |> gpu
end
loader() # returns a DataLoader, with first element a tuple like this:
x1, y1 = first(loader()); # (28×28×1×64 Array{Float32, 3}, 10×64 OneHotMatrix(::Vector{UInt32}))
# If you are using a GPU, these should be CuArray{Float32, 3} etc.
# If not, the `gpu` function does nothing (except complain the first time).
#===== MODEL =====#
# LeNet has two convolutional layers, and our modern version has relu nonlinearities.
# After each conv layer there's a pooling step. Finally, there are some fully connected layers:
lenet = Chain(
Conv((5, 5), 1=>6, relu),
MaxPool((2, 2)),
Conv((5, 5), 6=>16, relu),
MaxPool((2, 2)),
Flux.flatten,
Dense(256 => 120, relu),
Dense(120 => 84, relu),
Dense(84 => 10),
) |> gpu
# Notice that most of the parameters are in the final Dense layers.
y1hat = lenet(x1) # try it out
sum(softmax(y1hat); dims=1)
# Each column of softmax(y1hat) may be thought of as the network's probabilities
# that an input image is in each of 10 classes. To find its most likely answer,
# we can look for the largest output in each column, without needing softmax first.
# At the moment, these don't resemble the true values at all:
@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))
#===== METRICS =====#
# We're going to log accuracy and loss during training. There's no advantage to
# calculating these on minibatches, since MNIST is small enough to do it at once.
using Statistics: mean # standard library
function loss_and_accuracy(model, data::MNIST=test_data)
(x,y) = only(loader(data; batchsize=length(data))) # make one big batch
= model(x)
loss = Flux.logitcrossentropy(, y) # did not include softmax in the model
acc = round(100 * mean(Flux.onecold() .== Flux.onecold(y)); digits=2)
(; loss, acc, split=data.split) # return a NamedTuple
end
@show loss_and_accuracy(lenet); # accuracy about 10%, before training
#===== TRAINING =====#
# Let's collect some hyper-parameters in a NamedTuple, just to write them in one place.
# Global variables are fine -- we won't access this from inside any fast loops.
settings = (;
eta = 3e-4, # learning rate
lambda = 1e-2, # for weight decay
batchsize = 128,
epochs = 10,
)
train_log = []
# Initialise the storage needed for the optimiser:
opt_rule = OptimiserChain(WeightDecay(settings.lambda), Adam(settings.eta))
opt_state = Flux.setup(opt_rule, lenet);
for epoch in 1:settings.epochs
# @time will show a much longer time for the first epoch, due to compilation
@time for (x,y) in loader(batchsize=settings.batchsize)
grads = Flux.gradient(m -> Flux.logitcrossentropy(m(x), y), lenet)
Flux.update!(opt_state, lenet, grads[1])
end
# Logging & saving, but not on every epoch
if epoch % 2 == 1
loss, acc, _ = loss_and_accuracy(lenet)
test_loss, test_acc, _ = loss_and_accuracy(lenet, test_data)
@info "logging:" epoch acc test_acc
nt = (; epoch, loss, acc, test_loss, test_acc) # make a NamedTuple
push!(train_log, nt)
end
if epoch % 5 == 0
JLD2.jldsave(filename; lenet_state = Flux.state(lenet) |> cpu)
println("saved to ", filename, " after ", epoch, " epochs")
end
end
@show train_log;
# We can re-run the quick sanity-check of predictions:
y1hat = lenet(x1)
@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))
#===== INSPECTION =====#
using ImageCore, ImageInTerminal
xtest, ytest = only(loader(test_data, batchsize=length(test_data)));
# There are many ways to look at images, you won't need ImageInTerminal if working in a notebook.
# ImageCore.Gray is a special type, whick interprets numbers between 0.0 and 1.0 as shades:
xtest[:,:,1,5] .|> Gray |> transpose |> cpu
Flux.onecold(ytest, 0:9)[5] # true label, should match!
# Let's look for the image whose classification is least certain.
# First, in each column of probabilities, ask for the largest one.
# Then, over all images, ask for the lowest such probability, and its index.
ptest = softmax(lenet(xtest))
max_p = maximum(ptest; dims=1)
_, i = findmin(vec(max_p))
xtest[:,:,1,i] .|> Gray |> transpose |> cpu
Flux.onecold(ytest, 0:9)[i] # true classification
ptest[:,i] # probabilities of all outcomes
Flux.onecold(ptest[:,i], 0:9) # uncertain prediction
#===== ARRAY SIZES =====#
# A layer like Conv((5, 5), 1=>6) takes 5x5 patches of an image, and matches them to each
# of 6 different 5x5 filters, placed at every possible position. These filters are here:
Conv((5, 5), 1=>6).weight |> summary # 5×5×1×6 Array{Float32, 4}
# This layer can accept any size of image; let's trace the sizes with the actual input:
#=
julia> x1 |> size
(28, 28, 1, 64)
julia> lenet[1](x1) |> size # after Conv((5, 5), 1=>6, relu),
(24, 24, 6, 64)
julia> lenet[1:2](x1) |> size # after MaxPool((2, 2))
(12, 12, 6, 64)
julia> lenet[1:3](x1) |> size # after Conv((5, 5), 6 => 16, relu)
(8, 8, 16, 64)
julia> lenet[1:4](x1) |> size # after MaxPool((2, 2))
(4, 4, 16, 64)
julia> lenet[1:5](x1) |> size # after Flux.flatten
(256, 64)
=#
# Flux.flatten is just reshape, preserving the batch dimesion (64) while combining others (4*4*16).
# This 256 must match the Dense(256 => 120). Here is how to automate this, with Flux.outputsize:
lenet2 = Flux.@autosize (28, 28, 1, 1) Chain(
Conv((5, 5), 1=>6, relu),
MaxPool((2, 2)),
Conv((5, 5), _=>16, relu),
MaxPool((2, 2)),
Flux.flatten,
Dense(_ => 120, relu),
Dense(_ => 84, relu),
Dense(_ => 10),
)
# Check that this indeed accepts input the same size as above:
@show lenet2(cpu(x1)) |> size;
#===== LOADING =====#
# During training, the code above saves the model state to disk. Load the last version:
loaded_state = JLD2.load(filename, "lenet_state");
# Now you would normally re-create the model, and copy all parameters into that.
# We can use lenet2 from just above:
Flux.loadmodel!(lenet2, loaded_state)
# Check that it now agrees with the earlier, trained, model:
@show lenet2(cpu(x1)) cpu(lenet(x1);
#===== THE END =====#

62
aux_func.jl Executable file
View File

@ -0,0 +1,62 @@
#using Turing, MLDatasets
# Function to get datasets
function get_data(name::String)
if name == "mnist"
train_data_mnist = MLDatasets.MNIST(;Tx=Float32, split=:train)
test_data_mnist = MLDatasets.MNIST(;Tx=Float32, split=:test)
return train_data_mnist, test_data_mnist
elseif name == "cifar"
train_data_cifar = MLDatasets.CIFAR10(;Tx=Float32, split=:train)
test_data_cifar = MLDatasets.CIFAR10(;Tx=Float32, split=:test)
return train_data_cifar, test_data_cifar
else
println("That is not a valid dataset")
end
end
function loader(data::MNIST=train_data; batchsize::Int=64)
x4dim = reshape(data.features, 28,28,1,:) # insert trivial channel dim
yhot = Flux.onehotbatch(data.targets, 0:9) # make a 10×60000 OneHotMatrix
Flux.DataLoader((x4dim, yhot); batchsize, shuffle=true) #|> gpu
end
# Create a regularization term and a Gaussian prior variance term.
alpha = 0.09;
sig = sqrt(1.0 / alpha);
# Specify the probabilistic model.
@model function bayes_nn(xs, ys, nparameters, reconstruct)
# Create the weight and bias vector.
parameters ~ MvNormal(zeros(nparameters), sig .* ones(nparameters))
# Construct NN from parameters
nn = reconstruct(parameters)
# Forward NN to make predictions
preds = []
for x in xs
push!(preds,nn(x))
end
# Observe each prediction.
for p in preds
col, row = size(p)
tempy = []
for r in 1:row
end
end
for i in 1:length(ys)
ys[i] ~ Multinomial(1,preds[i])
end
end;
# A helper to create NN from weights `theta` and run it through data `x`
nn_forward(x, theta) = reconstruct(theta)(x)
# Return the average predicted value across
# multiple weights.
function nn_predict(x, theta, num)
return mean([nn_forward(x, theta[i, :])[1] for i in 1:10:num])
end;