Added initial files

2023-05-24 20:20:51 +01:00 · 2023-05-24 20:20:51 +01:00 · ebf272f722
commit ebf272f722
parent 2f99a87f77
3 changed files with 447 additions and 0 deletions
--- a/BNN/BNN.jl
+++ b/BNN/BNN.jl
@ -0,0 +1,163 @@
+#=
+network_shape = [
+  ((5, 5), 1=>6, relu),
+  ((2, 2)),
+  ((5, 5), 6=>16, relu),
+  ((2, 2)),
+  Flux.flatten,
+  (256 => 120, relu),
+  (120 => 84, relu),
+  (84 => 10),
+];
+=#
+
+#=
+lenet = Chain(
+    Conv((5, 5), 1=>6, relu),
+    MaxPool((2, 2)),
+    Conv((5, 5), 6=>16, relu),
+    MaxPool((2, 2)),
+    Flux.flatten,
+    Dense(256 => 120, relu),
+    Dense(120 => 84, relu), 
+    Dense(84 => 10),
+) 
+=#
+
+#########################################################
+
+# Import libraries.
+using Turing, Flux, Plots, Random, ReverseDiff, MLDatasets
+include("./aux_func.jl")
+
+# Hide sampling progress.
+Turing.setprogress!(false);
+
+# Use reverse_diff due to the number of parameters in neural networks.
+Turing.setadbackend(:reversediff)
+
+train_mnist, test_mnist = get_data("mnist")
+#train_cifar, test_cifar = get_data("cifar")
+
+# Number of points to generate.
+#N = 80;
+#M = round(Int, N / 4);
+Random.seed!(1234)
+
+
+#=
+# Generate artificial data.
+x1s = rand(M) * 4.5;
+x2s = rand(M) * 4.5;
+xt1s = Array([[x1s[i] + 0.5; x2s[i] + 0.5] for i in 1:M])
+x1s = rand(M) * 4.5;
+x2s = rand(M) * 4.5;
+append!(xt1s, Array([[x1s[i] - 5; x2s[i] - 5] for i in 1:M]))
+
+x1s = rand(M) * 4.5;
+x2s = rand(M) * 4.5;
+xt0s = Array([[x1s[i] + 0.5; x2s[i] - 5] for i in 1:M])
+x1s = rand(M) * 4.5;
+x2s = rand(M) * 4.5;
+append!(xt0s, Array([[x1s[i] - 5; x2s[i] + 0.5] for i in 1:M]))
+
+# Store all the data for later.
+xs = [xt1s; xt0s]
+ts = [ones(2 * M); zeros(2 * M)]
+
+# Plot data points.
+function plot_data()
+    x1 = map(e -> e[1], xt1s)
+    y1 = map(e -> e[2], xt1s)
+    x2 = map(e -> e[1], xt0s)
+    y2 = map(e -> e[2], xt0s)
+
+    Plots.scatter(x1, y1; color="red", clim=(0, 1))
+    return Plots.scatter!(x2, y2; color="blue", clim=(0, 1))
+end
+
+plot_data()
+=#
+
+# Construct a neural network using Flux
+lenet = Chain(
+    Conv((5, 5), 1=>6, relu),
+    MaxPool((2, 2)),
+    Conv((5, 5), 6=>16, relu),
+    MaxPool((2, 2)),
+    Flux.flatten,
+    Dense(256 => 120, relu),
+    Dense(120 => 84, relu), 
+    Dense(84 => 10),
+) 
+
+batches = loader(train_mnist);
+
+xs = [];
+ys = [];
+for b in batches
+  push!(xs,b[1])
+  push!(ys,b[2])
+end
+
+
+#x1, y1 = first(loader(train_mnist)); # (28×28×1×64 Array{Float32, 3}, 10×64 OneHotMatrix(::Vector{UInt32}))
+
+
+# Extract weights and a helper function to reconstruct NN from weights
+parameters_initial, reconstruct = Flux.destructure(lenet);
+
+tot_param = length(parameters_initial); # number of parameters in NN
+
+
+# Perform inference.
+N = 5000;
+ch = sample(
+    bayes_nn(xs, ys, tot_params, reconstruct), HMC(0.05, 4), N
+);
+
+# Extract all weight and bias parameters.
+theta = MCMCChains.group(ch, :parameters).value;
+
+
+# Plot the data we have.
+plot_data()
+
+# Find the index that provided the highest log posterior in the chain.
+_, i = findmax(ch[:lp]);
+
+# Extract the max row value from i.
+i = i.I[1];
+
+# Plot the posterior distribution with a contour plot
+x1_range = collect(range(-6; stop=6, length=25));
+x2_range = collect(range(-6; stop=6, length=25));
+Z = [nn_forward([x1, x2], theta[i, :])[1] for x1 in x1_range, x2 in x2_range];
+contour!(x1_range, x2_range, Z)
+
+# Plot the average prediction.
+plot_data()
+
+n_end = 1500;
+x1_range = collect(range(-6; stop=6, length=25));
+x2_range = collect(range(-6; stop=6, length=25));
+Z = [nn_predict([x1, x2], theta, n_end)[1] for x1 in x1_range, x2 in x2_range];
+contour!(x1_range, x2_range, Z)
+# Number of iterations to plot.
+n_end = 500;
+
+anim = @gif for i in 1:n_end
+    plot_data()
+    Z = [nn_forward([x1, x2], theta[i, :])[1] for x1 in x1_range, x2 in x2_range]
+    contour!(x1_range, x2_range, Z; title="Iteration $i", clim=(0, 1))
+end every 5
+
+
+
+
+
+
+
+
+
+
--- a/FNN/FNN.jl
+++ b/FNN/FNN.jl
@ -0,0 +1,222 @@
+# Classification of MNIST dataset using a convolutional network,
+# which is a variant of the original LeNet from 1998.
+
+# This example uses a GPU if you have one.
+# And demonstrates how to save model state.
+
+using MLDatasets, Flux, JLD2, CUDA  # this will install everything if necc.
+
+folder = "runs"  # sub-directory in which to save
+isdir(folder) || mkdir(folder)
+filename = joinpath(folder, "lenet.jld2")
+
+#===== DATA =====#
+
+# Calling MLDatasets.MNIST() will dowload the dataset if necessary,
+# and return a struct containing it.
+# It takes a few seconds to read from disk each time, so do this once:
+
+train_data = MLDatasets.MNIST()  # i.e. split=:train
+test_data = MLDatasets.MNIST(split=:test)
+
+# train_data.features is a 28×28×60000 Array{Float32, 3} of the images.
+# Flux needs a 4D array, with the 3rd dim for channels -- here trivial, grayscale.
+# Combine the reshape needed with other pre-processing:
+
+function loader(data::MNIST=train_data; batchsize::Int=64)
+    x4dim = reshape(data.features, 28,28,1,:)   # insert trivial channel dim
+    yhot = Flux.onehotbatch(data.targets, 0:9)  # make a 10×60000 OneHotMatrix
+    Flux.DataLoader((x4dim, yhot); batchsize, shuffle=true) |> gpu
+end
+
+loader()  # returns a DataLoader, with first element a tuple like this:
+
+x1, y1 = first(loader()); # (28×28×1×64 Array{Float32, 3}, 10×64 OneHotMatrix(::Vector{UInt32}))
+
+# If you are using a GPU, these should be CuArray{Float32, 3} etc.
+# If not, the `gpu` function does nothing (except complain the first time).
+
+#===== MODEL =====#
+
+# LeNet has two convolutional layers, and our modern version has relu nonlinearities.
+# After each conv layer there's a pooling step. Finally, there are some fully connected layers:
+
+lenet = Chain(
+    Conv((5, 5), 1=>6, relu),
+    MaxPool((2, 2)),
+    Conv((5, 5), 6=>16, relu),
+    MaxPool((2, 2)),
+    Flux.flatten,
+    Dense(256 => 120, relu),
+    Dense(120 => 84, relu), 
+    Dense(84 => 10),
+) |> gpu
+
+# Notice that most of the parameters are in the final Dense layers.
+
+y1hat = lenet(x1)  # try it out
+
+sum(softmax(y1hat); dims=1)
+
+# Each column of softmax(y1hat) may be thought of as the network's probabilities
+# that an input image is in each of 10 classes. To find its most likely answer, 
+# we can look for the largest output in each column, without needing softmax first. 
+# At the moment, these don't resemble the true values at all:
+
+@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))
+
+#===== METRICS =====#
+
+# We're going to log accuracy and loss during training. There's no advantage to
+# calculating these on minibatches, since MNIST is small enough to do it at once.
+
+using Statistics: mean  # standard library
+
+function loss_and_accuracy(model, data::MNIST=test_data)
+    (x,y) = only(loader(data; batchsize=length(data)))  # make one big batch
+    ŷ = model(x)
+    loss = Flux.logitcrossentropy(ŷ, y)  # did not include softmax in the model
+    acc = round(100 * mean(Flux.onecold(ŷ) .== Flux.onecold(y)); digits=2)
+    (; loss, acc, split=data.split)  # return a NamedTuple
+end
+
+@show loss_and_accuracy(lenet);  # accuracy about 10%, before training
+
+#===== TRAINING =====#
+
+# Let's collect some hyper-parameters in a NamedTuple, just to write them in one place.
+# Global variables are fine -- we won't access this from inside any fast loops.
+
+settings = (;
+    eta = 3e-4,     # learning rate
+    lambda = 1e-2,  # for weight decay
+    batchsize = 128,
+    epochs = 10,
+)
+train_log = []
+
+# Initialise the storage needed for the optimiser:
+
+opt_rule = OptimiserChain(WeightDecay(settings.lambda), Adam(settings.eta))
+opt_state = Flux.setup(opt_rule, lenet);
+
+for epoch in 1:settings.epochs
+    # @time will show a much longer time for the first epoch, due to compilation
+    @time for (x,y) in loader(batchsize=settings.batchsize)
+        grads = Flux.gradient(m -> Flux.logitcrossentropy(m(x), y), lenet)
+        Flux.update!(opt_state, lenet, grads[1])
+    end
+
+    # Logging & saving, but not on every epoch
+    if epoch % 2 == 1
+        loss, acc, _ = loss_and_accuracy(lenet)
+        test_loss, test_acc, _ = loss_and_accuracy(lenet, test_data)
+        @info "logging:" epoch acc test_acc
+        nt = (; epoch, loss, acc, test_loss, test_acc)  # make a NamedTuple
+        push!(train_log, nt)
+    end
+    if epoch % 5 == 0
+        JLD2.jldsave(filename; lenet_state = Flux.state(lenet) |> cpu)
+        println("saved to ", filename, " after ", epoch, " epochs")
+    end
+end
+
+@show train_log;
+
+# We can re-run the quick sanity-check of predictions:
+y1hat = lenet(x1)
+@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))
+
+#===== INSPECTION =====#
+
+using ImageCore, ImageInTerminal
+
+xtest, ytest = only(loader(test_data, batchsize=length(test_data)));
+
+# There are many ways to look at images, you won't need ImageInTerminal if working in a notebook.
+# ImageCore.Gray is a special type, whick interprets numbers between 0.0 and 1.0 as shades:
+
+xtest[:,:,1,5] .|> Gray |> transpose |> cpu
+
+Flux.onecold(ytest, 0:9)[5]  # true label, should match!
+
+# Let's look for the image whose classification is least certain.
+# First, in each column of probabilities, ask for the largest one.
+# Then, over all images, ask for the lowest such probability, and its index.
+
+ptest = softmax(lenet(xtest))
+max_p = maximum(ptest; dims=1)
+_, i = findmin(vec(max_p))
+
+xtest[:,:,1,i] .|> Gray |> transpose |> cpu
+
+Flux.onecold(ytest, 0:9)[i]  # true classification
+ptest[:,i]  # probabilities of all outcomes
+Flux.onecold(ptest[:,i], 0:9)  # uncertain prediction
+
+#===== ARRAY SIZES =====#
+
+# A layer like Conv((5, 5), 1=>6) takes 5x5 patches of an image, and matches them to each
+# of 6 different 5x5 filters, placed at every possible position. These filters are here:
+
+Conv((5, 5), 1=>6).weight |> summary  # 5×5×1×6 Array{Float32, 4}
+
+# This layer can accept any size of image; let's trace the sizes with the actual input:
+
+#=
+
+julia> x1 |> size
+(28, 28, 1, 64)
+
+julia> lenet[1](x1) |> size  # after Conv((5, 5), 1=>6, relu),
+(24, 24, 6, 64)
+
+julia> lenet[1:2](x1) |> size  # after MaxPool((2, 2))
+(12, 12, 6, 64)
+
+julia> lenet[1:3](x1) |> size  # after Conv((5, 5), 6 => 16, relu)
+(8, 8, 16, 64)
+
+julia> lenet[1:4](x1) |> size  # after MaxPool((2, 2))
+(4, 4, 16, 64)
+
+julia> lenet[1:5](x1) |> size  # after Flux.flatten 
+(256, 64)
+
+=#
+
+# Flux.flatten is just reshape, preserving the batch dimesion (64) while combining others (4*4*16).
+# This 256 must match the Dense(256 => 120). Here is how to automate this, with Flux.outputsize:
+
+lenet2 = Flux.@autosize (28, 28, 1, 1) Chain(
+    Conv((5, 5), 1=>6, relu),
+    MaxPool((2, 2)),
+    Conv((5, 5), _=>16, relu),
+    MaxPool((2, 2)),
+    Flux.flatten,
+    Dense(_ => 120, relu),
+    Dense(_ => 84, relu), 
+    Dense(_ => 10),
+)
+
+# Check that this indeed accepts input the same size as above:
+
+@show lenet2(cpu(x1)) |> size;
+
+#===== LOADING =====#
+
+# During training, the code above saves the model state to disk. Load the last version:
+
+loaded_state = JLD2.load(filename, "lenet_state");
+
+# Now you would normally re-create the model, and copy all parameters into that.
+# We can use lenet2 from just above:
+
+Flux.loadmodel!(lenet2, loaded_state)
+
+# Check that it now agrees with the earlier, trained, model:
+
+@show lenet2(cpu(x1)) ≈ cpu(lenet(x1);
+
+
+#===== THE END =====#
--- a/aux_func.jl
+++ b/aux_func.jl
@ -0,0 +1,62 @@
+#using Turing, MLDatasets
+
+# Function to get datasets
+function get_data(name::String)
+  if name == "mnist"
+    train_data_mnist = MLDatasets.MNIST(;Tx=Float32, split=:train)
+    test_data_mnist = MLDatasets.MNIST(;Tx=Float32, split=:test)
+    return train_data_mnist, test_data_mnist
+  elseif name == "cifar"
+    train_data_cifar = MLDatasets.CIFAR10(;Tx=Float32, split=:train)
+    test_data_cifar = MLDatasets.CIFAR10(;Tx=Float32, split=:test)
+    return train_data_cifar, test_data_cifar
+  else
+      println("That is not a valid dataset")
+  end
+end
+
+function loader(data::MNIST=train_data; batchsize::Int=64)
+    x4dim = reshape(data.features, 28,28,1,:)   # insert trivial channel dim
+    yhot = Flux.onehotbatch(data.targets, 0:9)  # make a 10×60000 OneHotMatrix
+    Flux.DataLoader((x4dim, yhot); batchsize, shuffle=true) #|> gpu
+end
+
+# Create a regularization term and a Gaussian prior variance term.
+alpha = 0.09;
+sig = sqrt(1.0 / alpha);
+
+# Specify the probabilistic model.
+@model function bayes_nn(xs, ys, nparameters, reconstruct)
+    # Create the weight and bias vector.
+    parameters ~ MvNormal(zeros(nparameters), sig .* ones(nparameters))
+
+    # Construct NN from parameters
+    nn = reconstruct(parameters)
+    # Forward NN to make predictions
+    preds = []
+    for x in xs
+      push!(preds,nn(x))
+    end
+
+    # Observe each prediction.
+    for p in preds
+      col, row = size(p)
+      tempy = []
+      for r in 1:row
+        
+      end
+    end
+    for i in 1:length(ys)
+        ys[i] ~ Multinomial(1,preds[i])
+    end
+end;
+
+# A helper to create NN from weights `theta` and run it through data `x`
+nn_forward(x, theta) = reconstruct(theta)(x)
+
+# Return the average predicted value across
+# multiple weights.
+function nn_predict(x, theta, num)
+    return mean([nn_forward(x, theta[i, :])[1] for i in 1:10:num])
+end;
+