Testing scripts for GreenFlux

2020-07-17 08:45:59 -06:00 · 2020-07-17 08:45:59 -06:00 · 973b17a4e6
commit 973b17a4e6
parent 6f9bff8947
3 changed files with 239 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+runs/*
--- a/laptoppower.jl
+++ b/laptoppower.jl
@ -0,0 +1,64 @@
+using Statistics
+
+gpucommand = `nvidia-smi`
+cpucommand = `powerstat -R -n -d0`
+ramcommand = `free`
+
+gpu = read(gpucommand, String);
+gpu = split(gpu, "\n")
+smis = Array{Any}(undef,length(gpu))
+for i in 1:length(gpu)
+    smis[i] = filter(x->x≠"",split(gpu[i], " "))
+end
+gpus = Array{Any}[]
+for strings in smis
+    if length(strings) > 5 && strings[6] == "/"
+        push!(gpus,strings)
+    end
+end
+
+#test = Array{Any}[]
+#for strings in smis 
+#    if length(strings) > 1
+#        println("$(typeof(string[2])) : $(strings)")
+        #if occursin(r"^-?(0|([1-9][0-9]*))(\.[0-9]+)?([eE][-+]?[0-9]+)?$", strings[2])
+        #    append!(test,strings)
+        #end
+#    end
+#end
+
+nogpus = length(gpus)
+
+powerdraw = Array{Float32}(undef,nogpus)
+powercap = Array{Float32}(undef,nogpus)
+for p in 1:nogpus
+    
+    if gpus[p][5] == "N/A"
+        gpus[p][5] = "0.0"
+    end
+    if gpus[p][7] == "N/A"
+        gpus[p][7] = "0.0"
+    end
+    powerdraw[p] = parse(Float32,gpus[p][5])
+    powercap[p] = parse(Float32,gpus[p][7])
+end
+
+pwavg = mean(powerdraw)
+cpavg = mean(powercap)
+
+println("The power average draw of the GPU's is $(pwavg) / $(cpavg) Watts.")
+
+cpu = read(cpucommand, String);
+cpu = split(cpu,"\n")
+cpu = cpu[66][60:64]#cpu[66:end-9]#cpu[5:end]
+
+println("The average power draw of the CPU is $cpu Watts.")
+
+ram = read(ramcommand, String);
+ram = split(ram,"\n")
+ram = split(ram[2]," ")
+filter!(x->x≠"",ram)
+usedram = parse(Float32,ram[3])
+totalram = parse(Float32,ram[2])
+
+println("The amount of power draw of the RAM is ram $(((usedram*1.575)/totalram)*1.904) Watts.")
--- a/lenet.jl
+++ b/lenet.jl
@ -0,0 +1,174 @@
+## Classification of MNIST dataset 
+## with the convolutional neural network know as LeNet5.
+## This script also combines various
+## packages from the Julia ecosystem  with Flux.
+using Flux
+using Flux.Data: DataLoader
+using Flux.Optimise: Optimiser, WeightDecay
+using Flux: onehotbatch, onecold, logitcrossentropy
+using Statistics, Random
+using Parameters: @with_kw
+using Logging: with_logger, global_logger
+using TensorBoardLogger: TBLogger, tb_overwrite, set_step!, set_step_increment!
+import ProgressMeter
+import MLDatasets
+import DrWatson: savename, struct2dict
+import BSON
+using CUDAapi
+
+# LeNet5 "constructor". 
+# The model can be adapted to any image size
+# and number of output classes.
+function LeNet5(; imgsize=(28,28,1), nclasses=10) 
+    out_conv_size = (imgsize[1]÷4 - 3, imgsize[2]÷4 - 3, 16)
+    
+    return Chain(
+            x -> reshape(x, imgsize..., :),
+            Conv((5, 5), imgsize[end]=>6, relu),
+            MaxPool((2, 2)),
+            Conv((5, 5), 6=>16, relu),
+            MaxPool((2, 2)),
+            x -> reshape(x, :, size(x, 4)),
+            Dense(prod(out_conv_size), 120, relu), 
+            Dense(120, 84, relu), 
+            Dense(84, nclasses)
+          )
+end
+
+function get_data(args)
+    xtrain, ytrain = MLDatasets.MNIST.traindata(Float32, dir=args.datapath)
+    xtest, ytest = MLDatasets.MNIST.testdata(Float32, dir=args.datapath)
+
+    xtrain = reshape(xtrain, 28, 28, 1, :)
+    xtest = reshape(xtest, 28, 28, 1, :)
+
+    ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)
+
+    train_loader = DataLoader(xtrain, ytrain, batchsize=args.batchsize, shuffle=true)
+    test_loader = DataLoader(xtest, ytest,  batchsize=args.batchsize)
+    
+    return train_loader, test_loader
+end
+
+loss(ŷ, y) = logitcrossentropy(ŷ, y)
+
+function eval_loss_accuracy(loader, model, device)
+    l = 0f0
+    acc = 0
+    ntot = 0
+    for (x, y) in loader
+        x, y = x |> device, y |> device
+        ŷ = model(x)
+        l += loss(ŷ, y) * size(x)[end]        
+        acc += sum(onecold(ŷ |> cpu) .== onecold(y |> cpu))
+        ntot += size(x)[end]
+    end
+    return (loss = l/ntot |> round4, acc = acc/ntot*100 |> round4)
+end
+
+## utility functions
+
+num_params(model) = sum(length, Flux.params(model)) 
+
+round4(x) = round(x, digits=4)
+
+
+# arguments for the `train` function 
+@with_kw mutable struct Args
+    η = 3e-4             # learning rate
+    λ = 0                # L2 regularizer param, implemented as weight decay
+    batchsize = 128      # batch size
+    epochs = 20          # number of epochs
+    seed = 0             # set seed > 0 for reproducibility
+    cuda = true          # if true use cuda (if available)
+    infotime = 1 	     # report every `infotime` epochs
+    checktime = 5        # Save the model every `checktime` epochs. Set to 0 for no checkpoints.
+    tblogger = false      # log training with tensorboard
+    savepath = nothing    # results path. If nothing, construct a default path from Args. If existing, may overwrite
+    datapath = joinpath(homedir(), "Datasets", "MNIST") # data path: change to your data directory 
+end
+
+function train(; kws...)
+    args = Args(; kws...)
+    args.seed > 0 && Random.seed!(args.seed)
+    use_cuda = args.cuda && CUDAapi.has_cuda_gpu()
+    if use_cuda
+        device = gpu
+        @info "Training on GPU"
+    else
+        device = cpu
+        @info "Training on CPU"
+    end
+
+    ## DATA
+    train_loader, test_loader = get_data(args)
+    @info "Dataset MNIST: $(train_loader.nobs) train and $(test_loader.nobs) test examples"
+
+    ## MODEL AND OPTIMIZER
+    model = LeNet5() |> device
+    @info "LeNet5 model: $(num_params(model)) trainable params"    
+    
+    ps = Flux.params(model)  
+
+    opt = ADAM(args.η) 
+    if args.λ > 0 
+        opt = Optimiser(opt, WeightDecay(args.λ))
+    end
+    
+    ## LOGGING UTILITIES
+    if args.savepath == nothing
+        experiment_folder = savename("lenet", args, scientific=4,
+                    accesses=[:batchsize, :η, :seed, :λ]) # construct path from these fields
+        args.savepath = joinpath("runs", experiment_folder)
+    end
+    if args.tblogger 
+        tblogger = TBLogger(args.savepath, tb_overwrite)
+        set_step_increment!(tblogger, 0) # 0 auto increment since we manually set_step!
+        @info "TensorBoard logging at \"$(args.savepath)\""
+    end
+    
+    function report(epoch)
+        train = eval_loss_accuracy(train_loader, model, device)
+        test = eval_loss_accuracy(test_loader, model, device)        
+        println("Epoch: $epoch   Train: $(train)   Test: $(test)")
+        if args.tblogger
+            set_step!(tblogger, epoch)
+            with_logger(tblogger) do
+                @info "train" loss=train.loss  acc=train.acc
+                @info "test"  loss=test.loss   acc=test.acc
+            end
+        end
+    end
+    
+    ## TRAINING
+    @info "Start Training"
+    report(0)
+    for epoch in 1:args.epochs
+        p = ProgressMeter.Progress(length(train_loader))
+
+        for (x, y) in train_loader
+            x, y = x |> device, y |> device
+            gs = Flux.gradient(ps) do
+                ŷ = model(x)
+                loss(ŷ, y)
+            end
+            Flux.Optimise.update!(opt, ps, gs)
+            ProgressMeter.next!(p)   # comment out for no progress bar
+        end
+        
+        epoch % args.infotime == 0 && report(epoch)
+        if args.checktime > 0 && epoch % args.checktime == 0
+            !ispath(args.savepath) && mkpath(args.savepath)
+            modelpath = joinpath(args.savepath, "model.bson") 
+            let model=cpu(model), args=struct2dict(args)
+                BSON.@save modelpath model epoch args
+            end
+            @info "Model saved in \"$(modelpath)\""
+        end
+    end
+end
+
+## Execution as a script
+if abspath(PROGRAM_FILE) == @__FILE__ 
+    train()
+end