Added initial files

2023-05-24 20:20:51 +01:00 · 2023-05-24 20:20:51 +01:00 · ebf272f722
commit ebf272f722
parent 2f99a87f77
3 changed files with 447 additions and 0 deletions
--- a/BNN/BNN.jl
+++ b/BNN/BNN.jl
@ -0,0 +1,163 @@
 #=
 network_shape = [
  ((5, 5), 1=>6, relu),
  ((2, 2)),
  ((5, 5), 6=>16, relu),
  ((2, 2)),
  Flux.flatten,
  (256 => 120, relu),
  (120 => 84, relu),
  (84 => 10),
 ];
 =#
 #=
 lenet = Chain(
    Conv((5, 5), 1=>6, relu),
    MaxPool((2, 2)),
    Conv((5, 5), 6=>16, relu),
    MaxPool((2, 2)),
    Flux.flatten,
    Dense(256 => 120, relu),
    Dense(120 => 84, relu), 
    Dense(84 => 10),
 ) 
 =#
 #########################################################
 # Import libraries.
 using Turing, Flux, Plots, Random, ReverseDiff, MLDatasets
 include("./aux_func.jl")
 # Hide sampling progress.
 Turing.setprogress!(false);
 # Use reverse_diff due to the number of parameters in neural networks.
 Turing.setadbackend(:reversediff)
 train_mnist, test_mnist = get_data("mnist")
 #train_cifar, test_cifar = get_data("cifar")
 # Number of points to generate.
 #N = 80;
 #M = round(Int, N / 4);
 Random.seed!(1234)
 #=
 # Generate artificial data.
 x1s = rand(M) * 4.5;
 x2s = rand(M) * 4.5;
 xt1s = Array([[x1s[i] + 0.5; x2s[i] + 0.5] for i in 1:M])
 x1s = rand(M) * 4.5;
 x2s = rand(M) * 4.5;
 append!(xt1s, Array([[x1s[i] - 5; x2s[i] - 5] for i in 1:M]))
 x1s = rand(M) * 4.5;
 x2s = rand(M) * 4.5;
 xt0s = Array([[x1s[i] + 0.5; x2s[i] - 5] for i in 1:M])
 x1s = rand(M) * 4.5;
 x2s = rand(M) * 4.5;
 append!(xt0s, Array([[x1s[i] - 5; x2s[i] + 0.5] for i in 1:M]))
 # Store all the data for later.
 xs = [xt1s; xt0s]
 ts = [ones(2 * M); zeros(2 * M)]
 # Plot data points.
 function plot_data()
    x1 = map(e -> e[1], xt1s)
    y1 = map(e -> e[2], xt1s)
    x2 = map(e -> e[1], xt0s)
    y2 = map(e -> e[2], xt0s)
    Plots.scatter(x1, y1; color="red", clim=(0, 1))
    return Plots.scatter!(x2, y2; color="blue", clim=(0, 1))
 end
 plot_data()
 =#
 # Construct a neural network using Flux
 lenet = Chain(
    Conv((5, 5), 1=>6, relu),
    MaxPool((2, 2)),
    Conv((5, 5), 6=>16, relu),
    MaxPool((2, 2)),
    Flux.flatten,
    Dense(256 => 120, relu),
    Dense(120 => 84, relu), 
    Dense(84 => 10),
 ) 
 batches = loader(train_mnist);
 xs = [];
 ys = [];
 for b in batches
  push!(xs,b[1])
  push!(ys,b[2])
 end
 #x1, y1 = first(loader(train_mnist)); # (28×28×1×64 Array{Float32, 3}, 10×64 OneHotMatrix(::Vector{UInt32}))
 # Extract weights and a helper function to reconstruct NN from weights
 parameters_initial, reconstruct = Flux.destructure(lenet);
 tot_param = length(parameters_initial); # number of parameters in NN
 # Perform inference.
 N = 5000;
 ch = sample(
    bayes_nn(xs, ys, tot_params, reconstruct), HMC(0.05, 4), N
 );
 # Extract all weight and bias parameters.
 theta = MCMCChains.group(ch, :parameters).value;
 # Plot the data we have.
 plot_data()
 # Find the index that provided the highest log posterior in the chain.
 _, i = findmax(ch[:lp]);
 # Extract the max row value from i.
 i = i.I[1];
 # Plot the posterior distribution with a contour plot
 x1_range = collect(range(-6; stop=6, length=25));
 x2_range = collect(range(-6; stop=6, length=25));
 Z = [nn_forward([x1, x2], theta[i, :])[1] for x1 in x1_range, x2 in x2_range];
 contour!(x1_range, x2_range, Z)
 # Plot the average prediction.
 plot_data()
 n_end = 1500;
 x1_range = collect(range(-6; stop=6, length=25));
 x2_range = collect(range(-6; stop=6, length=25));
 Z = [nn_predict([x1, x2], theta, n_end)[1] for x1 in x1_range, x2 in x2_range];
 contour!(x1_range, x2_range, Z)
 # Number of iterations to plot.
 n_end = 500;
 anim = @gif for i in 1:n_end
    plot_data()
    Z = [nn_forward([x1, x2], theta[i, :])[1] for x1 in x1_range, x2 in x2_range]
    contour!(x1_range, x2_range, Z; title="Iteration $i", clim=(0, 1))
 end every 5
--- a/FNN/FNN.jl
+++ b/FNN/FNN.jl
@ -0,0 +1,222 @@
 # Classification of MNIST dataset using a convolutional network,
 # which is a variant of the original LeNet from 1998.
 # This example uses a GPU if you have one.
 # And demonstrates how to save model state.
 using MLDatasets, Flux, JLD2, CUDA  # this will install everything if necc.
 folder = "runs"  # sub-directory in which to save
 isdir(folder) || mkdir(folder)
 filename = joinpath(folder, "lenet.jld2")
 #===== DATA =====#
 # Calling MLDatasets.MNIST() will dowload the dataset if necessary,
 # and return a struct containing it.
 # It takes a few seconds to read from disk each time, so do this once:
 train_data = MLDatasets.MNIST()  # i.e. split=:train
 test_data = MLDatasets.MNIST(split=:test)
 # train_data.features is a 28×28×60000 Array{Float32, 3} of the images.
 # Flux needs a 4D array, with the 3rd dim for channels -- here trivial, grayscale.
 # Combine the reshape needed with other pre-processing:
 function loader(data::MNIST=train_data; batchsize::Int=64)
    x4dim = reshape(data.features, 28,28,1,:)   # insert trivial channel dim
    yhot = Flux.onehotbatch(data.targets, 0:9)  # make a 10×60000 OneHotMatrix
    Flux.DataLoader((x4dim, yhot); batchsize, shuffle=true) |> gpu
 end
 loader()  # returns a DataLoader, with first element a tuple like this:
 x1, y1 = first(loader()); # (28×28×1×64 Array{Float32, 3}, 10×64 OneHotMatrix(::Vector{UInt32}))
 # If you are using a GPU, these should be CuArray{Float32, 3} etc.
 # If not, the `gpu` function does nothing (except complain the first time).
 #===== MODEL =====#
 # LeNet has two convolutional layers, and our modern version has relu nonlinearities.
 # After each conv layer there's a pooling step. Finally, there are some fully connected layers:
 lenet = Chain(
    Conv((5, 5), 1=>6, relu),
    MaxPool((2, 2)),
    Conv((5, 5), 6=>16, relu),
    MaxPool((2, 2)),
    Flux.flatten,
    Dense(256 => 120, relu),
    Dense(120 => 84, relu), 
    Dense(84 => 10),
 ) |> gpu
 # Notice that most of the parameters are in the final Dense layers.
 y1hat = lenet(x1)  # try it out
 sum(softmax(y1hat); dims=1)
 # Each column of softmax(y1hat) may be thought of as the network's probabilities
 # that an input image is in each of 10 classes. To find its most likely answer, 
 # we can look for the largest output in each column, without needing softmax first. 
 # At the moment, these don't resemble the true values at all:
@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))
 #===== METRICS =====#
 # We're going to log accuracy and loss during training. There's no advantage to
 # calculating these on minibatches, since MNIST is small enough to do it at once.
 using Statistics: mean  # standard library
 function loss_and_accuracy(model, data::MNIST=test_data)
    (x,y) = only(loader(data; batchsize=length(data)))  # make one big batch
    ŷ = model(x)
    loss = Flux.logitcrossentropy(ŷ, y)  # did not include softmax in the model
    acc = round(100 * mean(Flux.onecold(ŷ) .== Flux.onecold(y)); digits=2)
    (; loss, acc, split=data.split)  # return a NamedTuple
 end
@show loss_and_accuracy(lenet);  # accuracy about 10%, before training
 #===== TRAINING =====#
 # Let's collect some hyper-parameters in a NamedTuple, just to write them in one place.
 # Global variables are fine -- we won't access this from inside any fast loops.
 settings = (;
    eta = 3e-4,     # learning rate
    lambda = 1e-2,  # for weight decay
    batchsize = 128,
    epochs = 10,
 )
 train_log = []
 # Initialise the storage needed for the optimiser:
 opt_rule = OptimiserChain(WeightDecay(settings.lambda), Adam(settings.eta))
 opt_state = Flux.setup(opt_rule, lenet);
 for epoch in 1:settings.epochs
    # @time will show a much longer time for the first epoch, due to compilation
    @time for (x,y) in loader(batchsize=settings.batchsize)
        grads = Flux.gradient(m -> Flux.logitcrossentropy(m(x), y), lenet)
        Flux.update!(opt_state, lenet, grads[1])
    end
    # Logging & saving, but not on every epoch
    if epoch % 2 == 1
        loss, acc, _ = loss_and_accuracy(lenet)
        test_loss, test_acc, _ = loss_and_accuracy(lenet, test_data)
        @info "logging:" epoch acc test_acc
        nt = (; epoch, loss, acc, test_loss, test_acc)  # make a NamedTuple
        push!(train_log, nt)
    end
    if epoch % 5 == 0
        JLD2.jldsave(filename; lenet_state = Flux.state(lenet) |> cpu)
        println("saved to ", filename, " after ", epoch, " epochs")
    end
 end
@show train_log;
 # We can re-run the quick sanity-check of predictions:
 y1hat = lenet(x1)
@show hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))
 #===== INSPECTION =====#
 using ImageCore, ImageInTerminal
 xtest, ytest = only(loader(test_data, batchsize=length(test_data)));
 # There are many ways to look at images, you won't need ImageInTerminal if working in a notebook.
 # ImageCore.Gray is a special type, whick interprets numbers between 0.0 and 1.0 as shades:
 xtest[:,:,1,5] .|> Gray |> transpose |> cpu
 Flux.onecold(ytest, 0:9)[5]  # true label, should match!
 # Let's look for the image whose classification is least certain.
 # First, in each column of probabilities, ask for the largest one.
 # Then, over all images, ask for the lowest such probability, and its index.
 ptest = softmax(lenet(xtest))
 max_p = maximum(ptest; dims=1)
 _, i = findmin(vec(max_p))
 xtest[:,:,1,i] .|> Gray |> transpose |> cpu
 Flux.onecold(ytest, 0:9)[i]  # true classification
 ptest[:,i]  # probabilities of all outcomes
 Flux.onecold(ptest[:,i], 0:9)  # uncertain prediction
 #===== ARRAY SIZES =====#
 # A layer like Conv((5, 5), 1=>6) takes 5x5 patches of an image, and matches them to each
 # of 6 different 5x5 filters, placed at every possible position. These filters are here:
 Conv((5, 5), 1=>6).weight |> summary  # 5×5×1×6 Array{Float32, 4}
 # This layer can accept any size of image; let's trace the sizes with the actual input:
 #=
 julia> x1 |> size
 (28, 28, 1, 64)
 julia> lenet[1](x1) |> size  # after Conv((5, 5), 1=>6, relu),
 (24, 24, 6, 64)
 julia> lenet[1:2](x1) |> size  # after MaxPool((2, 2))
 (12, 12, 6, 64)
 julia> lenet[1:3](x1) |> size  # after Conv((5, 5), 6 => 16, relu)
 (8, 8, 16, 64)
 julia> lenet[1:4](x1) |> size  # after MaxPool((2, 2))
 (4, 4, 16, 64)
 julia> lenet[1:5](x1) |> size  # after Flux.flatten 
 (256, 64)
 =#
 # Flux.flatten is just reshape, preserving the batch dimesion (64) while combining others (4*4*16).
 # This 256 must match the Dense(256 => 120). Here is how to automate this, with Flux.outputsize:
 lenet2 = Flux.@autosize (28, 28, 1, 1) Chain(
    Conv((5, 5), 1=>6, relu),
    MaxPool((2, 2)),
    Conv((5, 5), _=>16, relu),
    MaxPool((2, 2)),
    Flux.flatten,
    Dense(_ => 120, relu),
    Dense(_ => 84, relu), 
    Dense(_ => 10),
 )
 # Check that this indeed accepts input the same size as above:
@show lenet2(cpu(x1)) |> size;
 #===== LOADING =====#
 # During training, the code above saves the model state to disk. Load the last version:
 loaded_state = JLD2.load(filename, "lenet_state");
 # Now you would normally re-create the model, and copy all parameters into that.
 # We can use lenet2 from just above:
 Flux.loadmodel!(lenet2, loaded_state)
 # Check that it now agrees with the earlier, trained, model:
@show lenet2(cpu(x1)) ≈ cpu(lenet(x1);
 #===== THE END =====#
--- a/aux_func.jl
+++ b/aux_func.jl
@ -0,0 +1,62 @@
 #using Turing, MLDatasets
 # Function to get datasets
 function get_data(name::String)
  if name == "mnist"
    train_data_mnist = MLDatasets.MNIST(;Tx=Float32, split=:train)
    test_data_mnist = MLDatasets.MNIST(;Tx=Float32, split=:test)
    return train_data_mnist, test_data_mnist
  elseif name == "cifar"
    train_data_cifar = MLDatasets.CIFAR10(;Tx=Float32, split=:train)
    test_data_cifar = MLDatasets.CIFAR10(;Tx=Float32, split=:test)
    return train_data_cifar, test_data_cifar
  else
      println("That is not a valid dataset")
  end
 end
 function loader(data::MNIST=train_data; batchsize::Int=64)
    x4dim = reshape(data.features, 28,28,1,:)   # insert trivial channel dim
    yhot = Flux.onehotbatch(data.targets, 0:9)  # make a 10×60000 OneHotMatrix
    Flux.DataLoader((x4dim, yhot); batchsize, shuffle=true) #|> gpu
 end
 # Create a regularization term and a Gaussian prior variance term.
 alpha = 0.09;
 sig = sqrt(1.0 / alpha);
 # Specify the probabilistic model.
@model function bayes_nn(xs, ys, nparameters, reconstruct)
    # Create the weight and bias vector.
    parameters ~ MvNormal(zeros(nparameters), sig .* ones(nparameters))
    # Construct NN from parameters
    nn = reconstruct(parameters)
    # Forward NN to make predictions
    preds = []
    for x in xs
      push!(preds,nn(x))
    end
    # Observe each prediction.
    for p in preds
      col, row = size(p)
      tempy = []
      for r in 1:row
      end
    end
    for i in 1:length(ys)
        ys[i] ~ Multinomial(1,preds[i])
    end
 end;
 # A helper to create NN from weights `theta` and run it through data `x`
 nn_forward(x, theta) = reconstruct(theta)(x)
 # Return the average predicted value across
 # multiple weights.
 function nn_predict(x, theta, num)
    return mean([nn_forward(x, theta[i, :])[1] for i in 1:10:num])
 end;