Testing scripts for GreenFlux
This commit is contained in:
parent
6f9bff8947
commit
973b17a4e6
|
@ -0,0 +1 @@
|
||||||
|
runs/*
|
|
@ -0,0 +1,64 @@
|
||||||
|
using Statistics
|
||||||
|
|
||||||
|
gpucommand = `nvidia-smi`
|
||||||
|
cpucommand = `powerstat -R -n -d0`
|
||||||
|
ramcommand = `free`
|
||||||
|
|
||||||
|
gpu = read(gpucommand, String);
|
||||||
|
gpu = split(gpu, "\n")
|
||||||
|
smis = Array{Any}(undef,length(gpu))
|
||||||
|
for i in 1:length(gpu)
|
||||||
|
smis[i] = filter(x->x≠"",split(gpu[i], " "))
|
||||||
|
end
|
||||||
|
gpus = Array{Any}[]
|
||||||
|
for strings in smis
|
||||||
|
if length(strings) > 5 && strings[6] == "/"
|
||||||
|
push!(gpus,strings)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
#test = Array{Any}[]
|
||||||
|
#for strings in smis
|
||||||
|
# if length(strings) > 1
|
||||||
|
# println("$(typeof(string[2])) : $(strings)")
|
||||||
|
#if occursin(r"^-?(0|([1-9][0-9]*))(\.[0-9]+)?([eE][-+]?[0-9]+)?$", strings[2])
|
||||||
|
# append!(test,strings)
|
||||||
|
#end
|
||||||
|
# end
|
||||||
|
#end
|
||||||
|
|
||||||
|
nogpus = length(gpus)
|
||||||
|
|
||||||
|
powerdraw = Array{Float32}(undef,nogpus)
|
||||||
|
powercap = Array{Float32}(undef,nogpus)
|
||||||
|
for p in 1:nogpus
|
||||||
|
|
||||||
|
if gpus[p][5] == "N/A"
|
||||||
|
gpus[p][5] = "0.0"
|
||||||
|
end
|
||||||
|
if gpus[p][7] == "N/A"
|
||||||
|
gpus[p][7] = "0.0"
|
||||||
|
end
|
||||||
|
powerdraw[p] = parse(Float32,gpus[p][5])
|
||||||
|
powercap[p] = parse(Float32,gpus[p][7])
|
||||||
|
end
|
||||||
|
|
||||||
|
pwavg = mean(powerdraw)
|
||||||
|
cpavg = mean(powercap)
|
||||||
|
|
||||||
|
println("The power average draw of the GPU's is $(pwavg) / $(cpavg) Watts.")
|
||||||
|
|
||||||
|
cpu = read(cpucommand, String);
|
||||||
|
cpu = split(cpu,"\n")
|
||||||
|
cpu = cpu[66][60:64]#cpu[66:end-9]#cpu[5:end]
|
||||||
|
|
||||||
|
println("The average power draw of the CPU is $cpu Watts.")
|
||||||
|
|
||||||
|
ram = read(ramcommand, String);
|
||||||
|
ram = split(ram,"\n")
|
||||||
|
ram = split(ram[2]," ")
|
||||||
|
filter!(x->x≠"",ram)
|
||||||
|
usedram = parse(Float32,ram[3])
|
||||||
|
totalram = parse(Float32,ram[2])
|
||||||
|
|
||||||
|
println("The amount of power draw of the RAM is ram $(((usedram*1.575)/totalram)*1.904) Watts.")
|
|
@ -0,0 +1,174 @@
|
||||||
|
## Classification of MNIST dataset
|
||||||
|
## with the convolutional neural network know as LeNet5.
|
||||||
|
## This script also combines various
|
||||||
|
## packages from the Julia ecosystem with Flux.
|
||||||
|
using Flux
|
||||||
|
using Flux.Data: DataLoader
|
||||||
|
using Flux.Optimise: Optimiser, WeightDecay
|
||||||
|
using Flux: onehotbatch, onecold, logitcrossentropy
|
||||||
|
using Statistics, Random
|
||||||
|
using Parameters: @with_kw
|
||||||
|
using Logging: with_logger, global_logger
|
||||||
|
using TensorBoardLogger: TBLogger, tb_overwrite, set_step!, set_step_increment!
|
||||||
|
import ProgressMeter
|
||||||
|
import MLDatasets
|
||||||
|
import DrWatson: savename, struct2dict
|
||||||
|
import BSON
|
||||||
|
using CUDAapi
|
||||||
|
|
||||||
|
# LeNet5 "constructor".
|
||||||
|
# The model can be adapted to any image size
|
||||||
|
# and number of output classes.
|
||||||
|
function LeNet5(; imgsize=(28,28,1), nclasses=10)
|
||||||
|
out_conv_size = (imgsize[1]÷4 - 3, imgsize[2]÷4 - 3, 16)
|
||||||
|
|
||||||
|
return Chain(
|
||||||
|
x -> reshape(x, imgsize..., :),
|
||||||
|
Conv((5, 5), imgsize[end]=>6, relu),
|
||||||
|
MaxPool((2, 2)),
|
||||||
|
Conv((5, 5), 6=>16, relu),
|
||||||
|
MaxPool((2, 2)),
|
||||||
|
x -> reshape(x, :, size(x, 4)),
|
||||||
|
Dense(prod(out_conv_size), 120, relu),
|
||||||
|
Dense(120, 84, relu),
|
||||||
|
Dense(84, nclasses)
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
function get_data(args)
|
||||||
|
xtrain, ytrain = MLDatasets.MNIST.traindata(Float32, dir=args.datapath)
|
||||||
|
xtest, ytest = MLDatasets.MNIST.testdata(Float32, dir=args.datapath)
|
||||||
|
|
||||||
|
xtrain = reshape(xtrain, 28, 28, 1, :)
|
||||||
|
xtest = reshape(xtest, 28, 28, 1, :)
|
||||||
|
|
||||||
|
ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)
|
||||||
|
|
||||||
|
train_loader = DataLoader(xtrain, ytrain, batchsize=args.batchsize, shuffle=true)
|
||||||
|
test_loader = DataLoader(xtest, ytest, batchsize=args.batchsize)
|
||||||
|
|
||||||
|
return train_loader, test_loader
|
||||||
|
end
|
||||||
|
|
||||||
|
loss(ŷ, y) = logitcrossentropy(ŷ, y)
|
||||||
|
|
||||||
|
function eval_loss_accuracy(loader, model, device)
|
||||||
|
l = 0f0
|
||||||
|
acc = 0
|
||||||
|
ntot = 0
|
||||||
|
for (x, y) in loader
|
||||||
|
x, y = x |> device, y |> device
|
||||||
|
ŷ = model(x)
|
||||||
|
l += loss(ŷ, y) * size(x)[end]
|
||||||
|
acc += sum(onecold(ŷ |> cpu) .== onecold(y |> cpu))
|
||||||
|
ntot += size(x)[end]
|
||||||
|
end
|
||||||
|
return (loss = l/ntot |> round4, acc = acc/ntot*100 |> round4)
|
||||||
|
end
|
||||||
|
|
||||||
|
## utility functions
|
||||||
|
|
||||||
|
num_params(model) = sum(length, Flux.params(model))
|
||||||
|
|
||||||
|
round4(x) = round(x, digits=4)
|
||||||
|
|
||||||
|
|
||||||
|
# arguments for the `train` function
|
||||||
|
@with_kw mutable struct Args
|
||||||
|
η = 3e-4 # learning rate
|
||||||
|
λ = 0 # L2 regularizer param, implemented as weight decay
|
||||||
|
batchsize = 128 # batch size
|
||||||
|
epochs = 20 # number of epochs
|
||||||
|
seed = 0 # set seed > 0 for reproducibility
|
||||||
|
cuda = true # if true use cuda (if available)
|
||||||
|
infotime = 1 # report every `infotime` epochs
|
||||||
|
checktime = 5 # Save the model every `checktime` epochs. Set to 0 for no checkpoints.
|
||||||
|
tblogger = false # log training with tensorboard
|
||||||
|
savepath = nothing # results path. If nothing, construct a default path from Args. If existing, may overwrite
|
||||||
|
datapath = joinpath(homedir(), "Datasets", "MNIST") # data path: change to your data directory
|
||||||
|
end
|
||||||
|
|
||||||
|
function train(; kws...)
|
||||||
|
args = Args(; kws...)
|
||||||
|
args.seed > 0 && Random.seed!(args.seed)
|
||||||
|
use_cuda = args.cuda && CUDAapi.has_cuda_gpu()
|
||||||
|
if use_cuda
|
||||||
|
device = gpu
|
||||||
|
@info "Training on GPU"
|
||||||
|
else
|
||||||
|
device = cpu
|
||||||
|
@info "Training on CPU"
|
||||||
|
end
|
||||||
|
|
||||||
|
## DATA
|
||||||
|
train_loader, test_loader = get_data(args)
|
||||||
|
@info "Dataset MNIST: $(train_loader.nobs) train and $(test_loader.nobs) test examples"
|
||||||
|
|
||||||
|
## MODEL AND OPTIMIZER
|
||||||
|
model = LeNet5() |> device
|
||||||
|
@info "LeNet5 model: $(num_params(model)) trainable params"
|
||||||
|
|
||||||
|
ps = Flux.params(model)
|
||||||
|
|
||||||
|
opt = ADAM(args.η)
|
||||||
|
if args.λ > 0
|
||||||
|
opt = Optimiser(opt, WeightDecay(args.λ))
|
||||||
|
end
|
||||||
|
|
||||||
|
## LOGGING UTILITIES
|
||||||
|
if args.savepath == nothing
|
||||||
|
experiment_folder = savename("lenet", args, scientific=4,
|
||||||
|
accesses=[:batchsize, :η, :seed, :λ]) # construct path from these fields
|
||||||
|
args.savepath = joinpath("runs", experiment_folder)
|
||||||
|
end
|
||||||
|
if args.tblogger
|
||||||
|
tblogger = TBLogger(args.savepath, tb_overwrite)
|
||||||
|
set_step_increment!(tblogger, 0) # 0 auto increment since we manually set_step!
|
||||||
|
@info "TensorBoard logging at \"$(args.savepath)\""
|
||||||
|
end
|
||||||
|
|
||||||
|
function report(epoch)
|
||||||
|
train = eval_loss_accuracy(train_loader, model, device)
|
||||||
|
test = eval_loss_accuracy(test_loader, model, device)
|
||||||
|
println("Epoch: $epoch Train: $(train) Test: $(test)")
|
||||||
|
if args.tblogger
|
||||||
|
set_step!(tblogger, epoch)
|
||||||
|
with_logger(tblogger) do
|
||||||
|
@info "train" loss=train.loss acc=train.acc
|
||||||
|
@info "test" loss=test.loss acc=test.acc
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
## TRAINING
|
||||||
|
@info "Start Training"
|
||||||
|
report(0)
|
||||||
|
for epoch in 1:args.epochs
|
||||||
|
p = ProgressMeter.Progress(length(train_loader))
|
||||||
|
|
||||||
|
for (x, y) in train_loader
|
||||||
|
x, y = x |> device, y |> device
|
||||||
|
gs = Flux.gradient(ps) do
|
||||||
|
ŷ = model(x)
|
||||||
|
loss(ŷ, y)
|
||||||
|
end
|
||||||
|
Flux.Optimise.update!(opt, ps, gs)
|
||||||
|
ProgressMeter.next!(p) # comment out for no progress bar
|
||||||
|
end
|
||||||
|
|
||||||
|
epoch % args.infotime == 0 && report(epoch)
|
||||||
|
if args.checktime > 0 && epoch % args.checktime == 0
|
||||||
|
!ispath(args.savepath) && mkpath(args.savepath)
|
||||||
|
modelpath = joinpath(args.savepath, "model.bson")
|
||||||
|
let model=cpu(model), args=struct2dict(args)
|
||||||
|
BSON.@save modelpath model epoch args
|
||||||
|
end
|
||||||
|
@info "Model saved in \"$(modelpath)\""
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
## Execution as a script
|
||||||
|
if abspath(PROGRAM_FILE) == @__FILE__
|
||||||
|
train()
|
||||||
|
end
|
Loading…
Reference in New Issue