Compare commits

..

1 Commits

Author SHA1 Message Date
Eduardo Cueto-Mendoza 931cb79e42 Commited code from 2024 2024-05-10 11:04:28 +01:00
19 changed files with 365 additions and 903 deletions

7
.gitignore vendored
View File

@ -1,22 +1,21 @@
**/**/__pycache__/
**/**/__init__.py
**/__pycache__/
**/__init__.py
checkpoints/
__pycache__/
data_budget/
bayes_*
times_*
data/cifar-10-batches-py/
data/MNIST/
.vscode
freq_*
data/
.idea
*.pkl
*.txt
*.tar.gz
stp
sav
bay
frq
sav
tmp
*_DATA

View File

@ -1,4 +1,4 @@
Copyright (c) 2024 TastyPancakes.
Copyright (c) 2024 Eduardo Cueto-Mendoza.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

View File

@ -1,3 +1,7 @@
# Energy efficiency comparison
This experiment compares a Frequentist CNN model against a Bayesian CNN model
This experiment compares a Frequentist CNN model against a Bayesian CNN model
## Example run command
python run_service.py -f1 -s -e && sleep 60 && python run_service.py -f2 -s -e && sleep 60 && python run_service.py -f3 -s -e && sleep 60 && python run_service.py -f4 -s -e && sleep 60 && python run_service.py -f5 -s -e && sleep 60 & python run_service.py -f6 -s -e && sleep 60 && python run_service.py -f7 -s -e && sleep 60 && python run_service.py -b1 -s -e && sleep 60 && python run_service.py -b2 -s -e && sleep 60 && python run_service.py -b3 -s -e && sleep 60 && python run_service.py -b4 -s -e && sleep 60 && python run_service.py -b5 -s -e && sleep 60 && python run_service.py -b6 -s -e && sleep 60 && python run_service.py -b7 -s -e && sleep 60

View File

@ -1,9 +1,8 @@
import pickle
from warnings import warn
from gpu_power_func import get_sample_of_gpu
with open("configuration.pkl", "rb") as file:
with (open("configuration.pkl", "rb")) as file:
while True:
try:
cfg = pickle.load(file)
@ -17,14 +16,14 @@ with open("configuration.pkl", "rb") as file:
# print(cfg)
if __name__ == "__main__":
if __name__ == '__main__':
dataDump = []
while True:
try:
dataDump.append(get_sample_of_gpu())
with open(cfg["pickle_path"], "wb") as f:
with open(cfg["pickle_path"], 'wb') as f:
pickle.dump(dataDump, f)
except EOFError:
warn("Pickle ran out of space")
warn('Pickle ran out of space')
finally:
f.close()

View File

@ -6,55 +6,22 @@ all_args = argparse.ArgumentParser()
def makeArguments(arguments: ArgumentParser) -> dict:
"""Training arguments to be passed to the model"""
all_args.add_argument(
"-b",
"--Bayesian",
action="store",
dest="b",
type=int,
choices=range(1, 8),
help="Bayesian model of size x",
)
all_args.add_argument(
"-f",
"--Frequentist",
action="store",
dest="f",
type=int,
choices=range(1, 8),
help="Frequentist model of size x",
)
all_args.add_argument(
"-E", "--EarlyStopping", action="store_true", help="Early Stopping criteria"
)
all_args.add_argument(
"-e", "--EnergyBound", action="store_true", help="Energy Bound criteria"
)
all_args.add_argument(
"-a", "--AccuracyBound", action="store_true", help="Accuracy Bound criteria"
)
all_args.add_argument(
"-x",
"--EfficiencyStopping",
action="store_true",
help="Efficiency Stopping criteria",
)
all_args.add_argument("-s", "--Save", action="store_true", help="Save model")
all_args.add_argument(
"--net_type", default="lenet", type=str, help="model = [lenet/AlexNet/3Conv3FC]"
)
all_args.add_argument(
"-N",
"--noise_type",
default=None,
type=str,
help="noise = [Gaussian(m,s)/Raleigh(a,b)/Erlang(a,b)/Exponential(a)/Uniform(a,b)/Impulse(a)]",
)
all_args.add_argument(
"--dataset",
default="CIFAR10",
type=str,
help="dataset = [MNIST/CIFAR10/CIFAR100]",
)
all_args.add_argument("-b", "--Bayesian", action="store", dest="b",
type=int, choices=range(1, 8),
help="Bayesian model of size x")
all_args.add_argument("-f", "--Frequentist", action="store", dest="f",
type=int, choices=range(1, 8),
help="Frequentist model of size x")
all_args.add_argument("-E", "--EarlyStopping", action="store_true",
help="Early Stopping criteria")
all_args.add_argument("-e", "--EnergyBound", action="store_true",
help="Energy Bound criteria")
all_args.add_argument("-a", "--AccuracyBound", action="store_true",
help="Accuracy Bound criteria")
all_args.add_argument("-s", "--Save", action="store_true",
help="Save model")
all_args.add_argument('--net_type', default='lenet', type=str,
help='model = [lenet/AlexNet/3Conv3FC]')
all_args.add_argument('--dataset', default='MNIST', type=str,
help='dataset = [MNIST/CIFAR10/CIFAR100]')
return vars(all_args.parse_args())

View File

@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/env bash
powerstat -D -z 0.5 10000000 > $1
#powerstat -z 0.5 1000000 > $1

View File

@ -1 +0,0 @@
from .data import getDataloader, getDataset

View File

@ -1,405 +0,0 @@
import random
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler
class AddNoNoise(object):
def __init__(self, mean=0.0, std=1.0):
self.std = std
self.mean = mean
def __call__(self, tensor):
return tensor
def __repr__(self):
return self.__class__.__name__ + "No noise"
class AddGaussianNoise(object):
def __init__(self, mean=0.0, std=1.0):
self.std = std
self.mean = mean
def __call__(self, tensor):
return tensor + torch.randn(tensor.size()) * self.std + self.mean
def __repr__(self):
return self.__class__.__name__ + "(mean={0}, std={1})".format(
self.mean, self.std
)
class AddRaleighNoise(object):
def __init__(self, a=0.0, b=0.0):
self.std = (b * (4 - np.pi)) / 4
self.mean = a + np.sqrt((np.pi * b) / 4)
def __call__(self, tensor):
return tensor + torch.randn(tensor.size()) * self.std + self.mean
def __repr__(self):
return self.__class__.__name__ + "(mean={0}, std={1})".format(
self.mean, self.std
)
class AddErlangNoise(object):
def __init__(self, a=0.0, b=0.0):
if a == 0.0:
self.std = 0.0
self.mean = 0.0
else:
self.std = b / a
self.mean = b / (2 * a)
def __call__(self, tensor):
if self.mean == 0.0:
return tensor * self.mean
else:
return tensor + torch.randn(tensor.size()) * self.std + self.mean
def __repr__(self):
return self.__class__.__name__ + "(mean={0}, std={1})".format(
self.mean, self.std
)
class AddExponentialNoise(object):
def __init__(self, a=0.0, b=0):
if a == 0.0:
self.mean = 0.0
else:
self.std = 1 / (2 * a)
self.mean = 1 / a
def __call__(self, tensor):
if self.mean == 0.0:
return tensor * self.mean
else:
return tensor + torch.randn(tensor.size()) * self.std + self.mean
def __repr__(self):
return self.__class__.__name__ + "(mean={0}, std={1})".format(
self.mean, self.std
)
class AddUniformNoise(object):
def __init__(self, a=0.0, b=0.0):
if a == 0.0:
self.std = 0.0
self.mean = 0.0
else:
self.std = (b - a) ** 2 / 12
self.mean = (b + a) / 2
def __call__(self, tensor):
if self.mean == 0.0:
return tensor * self.mean
else:
return tensor + (torch.randn(tensor.size()) * self.std + self.mean)
def __repr__(self):
return self.__class__.__name__ + "(mean={0}, std={1})".format(
self.mean, self.std
)
class AddImpulseNoise(object):
def __init__(self, a=0.0, b=0):
self.value = a
def __call__(self, tensor):
if random.gauss(0, 1) > 0:
return tensor * self.value
elif random.gauss(0, 1) < 0:
return tensor * (-1 * self.value)
else:
return tensor * 0.0
def __repr__(self):
return self.__class__.__name__ + "(a={0})".format(self.value)
class CustomDataset(Dataset):
def __init__(self, data, labels, transform=None):
self.data = data
self.labels = labels
self.transform = transform
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
sample = self.data[idx]
label = self.labels[idx]
if self.transform:
sample = self.transform(sample)
return sample, label
def extract_classes(dataset, classes):
idx = torch.zeros_like(dataset.targets, dtype=torch.bool)
for target in classes:
idx = idx | (dataset.targets == target)
data, targets = dataset.data[idx], dataset.targets[idx]
return data, targets
def getDataset(dataset, noise=None, mean=0.0, std=0.0):
"""Function to get training datasets"""
noise_type = None
if noise is None:
# print("No noise added")
noise_type = AddNoNoise
elif noise == "gaussian":
noise_type = AddGaussianNoise
elif noise == "raleigh":
noise_type = AddRaleighNoise
elif noise == "erlang":
noise_type = AddErlangNoise
elif noise == "exponential":
noise_type = AddExponentialNoise
elif noise == "uniform":
noise_type = AddUniformNoise
elif noise == "impulse":
noise_type = AddImpulseNoise
print(f"{noise_type} noise added")
transform_split_mnist = transforms.Compose(
[
transforms.ToPILImage(),
transforms.Resize((32, 32)),
transforms.ToTensor(),
noise_type(mean, std),
]
)
transform_mnist = transforms.Compose(
[
transforms.Resize((32, 32)),
transforms.ToTensor(),
noise_type(mean, std),
]
)
transform_cifar = transforms.Compose(
[
transforms.Resize((32, 32)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
noise_type(mean, std),
]
)
if dataset == "CIFAR10":
trainset = torchvision.datasets.CIFAR10(
root="./data", train=True, download=True, transform=transform_cifar
)
testset = torchvision.datasets.CIFAR10(
root="./data", train=False, download=True, transform=transform_cifar
)
num_classes = 10
inputs = 3
elif dataset == "CIFAR100":
trainset = torchvision.datasets.CIFAR100(
root="./data", train=True, download=True, transform=transform_cifar
)
testset = torchvision.datasets.CIFAR100(
root="./data", train=False, download=True, transform=transform_cifar
)
num_classes = 100
inputs = 3
elif dataset == "MNIST":
trainset = torchvision.datasets.MNIST(
root="./data", train=True, download=True, transform=transform_mnist
)
testset = torchvision.datasets.MNIST(
root="./data", train=False, download=True, transform=transform_mnist
)
num_classes = 10
inputs = 1
elif dataset == "SplitMNIST-2.1":
trainset = torchvision.datasets.MNIST(
root="./data", train=True, download=True, transform=transform_mnist
)
testset = torchvision.datasets.MNIST(
root="./data", train=False, download=True, transform=transform_mnist
)
train_data, train_targets = extract_classes(trainset, [0, 1, 2, 3, 4])
test_data, test_targets = extract_classes(testset, [0, 1, 2, 3, 4])
trainset = CustomDataset(
train_data, train_targets, transform=transform_split_mnist
)
testset = CustomDataset(
test_data, test_targets, transform=transform_split_mnist
)
num_classes = 5
inputs = 1
elif dataset == "SplitMNIST-2.2":
trainset = torchvision.datasets.MNIST(
root="./data", train=True, download=True, transform=transform_mnist
)
testset = torchvision.datasets.MNIST(
root="./data", train=False, download=True, transform=transform_mnist
)
train_data, train_targets = extract_classes(trainset, [5, 6, 7, 8, 9])
test_data, test_targets = extract_classes(testset, [5, 6, 7, 8, 9])
train_targets -= 5 # Mapping target 5-9 to 0-4
test_targets -= 5 # Hence, add 5 after prediction
trainset = CustomDataset(
train_data, train_targets, transform=transform_split_mnist
)
testset = CustomDataset(
test_data, test_targets, transform=transform_split_mnist
)
num_classes = 5
inputs = 1
elif dataset == "SplitMNIST-5.1":
trainset = torchvision.datasets.MNIST(
root="./data", train=True, download=True, transform=transform_mnist
)
testset = torchvision.datasets.MNIST(
root="./data", train=False, download=True, transform=transform_mnist
)
train_data, train_targets = extract_classes(trainset, [0, 1])
test_data, test_targets = extract_classes(testset, [0, 1])
trainset = CustomDataset(
train_data, train_targets, transform=transform_split_mnist
)
testset = CustomDataset(
test_data, test_targets, transform=transform_split_mnist
)
num_classes = 2
inputs = 1
elif dataset == "SplitMNIST-5.2":
trainset = torchvision.datasets.MNIST(
root="./data", train=True, download=True, transform=transform_mnist
)
testset = torchvision.datasets.MNIST(
root="./data", train=False, download=True, transform=transform_mnist
)
train_data, train_targets = extract_classes(trainset, [2, 3])
test_data, test_targets = extract_classes(testset, [2, 3])
train_targets -= 2 # Mapping target 2-3 to 0-1
test_targets -= 2 # Hence, add 2 after prediction
trainset = CustomDataset(
train_data, train_targets, transform=transform_split_mnist
)
testset = CustomDataset(
test_data, test_targets, transform=transform_split_mnist
)
num_classes = 2
inputs = 1
elif dataset == "SplitMNIST-5.3":
trainset = torchvision.datasets.MNIST(
root="./data", train=True, download=True, transform=transform_mnist
)
testset = torchvision.datasets.MNIST(
root="./data", train=False, download=True, transform=transform_mnist
)
train_data, train_targets = extract_classes(trainset, [4, 5])
test_data, test_targets = extract_classes(testset, [4, 5])
train_targets -= 4 # Mapping target 4-5 to 0-1
test_targets -= 4 # Hence, add 4 after prediction
trainset = CustomDataset(
train_data, train_targets, transform=transform_split_mnist
)
testset = CustomDataset(
test_data, test_targets, transform=transform_split_mnist
)
num_classes = 2
inputs = 1
elif dataset == "SplitMNIST-5.4":
trainset = torchvision.datasets.MNIST(
root="./data", train=True, download=True, transform=transform_mnist
)
testset = torchvision.datasets.MNIST(
root="./data", train=False, download=True, transform=transform_mnist
)
train_data, train_targets = extract_classes(trainset, [6, 7])
test_data, test_targets = extract_classes(testset, [6, 7])
train_targets -= 6 # Mapping target 6-7 to 0-1
test_targets -= 6 # Hence, add 6 after prediction
trainset = CustomDataset(
train_data, train_targets, transform=transform_split_mnist
)
testset = CustomDataset(
test_data, test_targets, transform=transform_split_mnist
)
num_classes = 2
inputs = 1
elif dataset == "SplitMNIST-5.5":
trainset = torchvision.datasets.MNIST(
root="./data", train=True, download=True, transform=transform_mnist
)
testset = torchvision.datasets.MNIST(
root="./data", train=False, download=True, transform=transform_mnist
)
train_data, train_targets = extract_classes(trainset, [8, 9])
test_data, test_targets = extract_classes(testset, [8, 9])
train_targets -= 8 # Mapping target 8-9 to 0-1
test_targets -= 8 # Hence, add 8 after prediction
trainset = CustomDataset(
train_data, train_targets, transform=transform_split_mnist
)
testset = CustomDataset(
test_data, test_targets, transform=transform_split_mnist
)
num_classes = 2
inputs = 1
return trainset, testset, inputs, num_classes
def getDataloader(trainset, testset, valid_size, batch_size, num_workers):
num_train = len(trainset)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
train_loader = torch.utils.data.DataLoader(
trainset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers
)
valid_loader = torch.utils.data.DataLoader(
trainset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers
)
test_loader = torch.utils.data.DataLoader(
testset, batch_size=batch_size, num_workers=num_workers
)
return train_loader, valid_loader, test_loader

View File

@ -1,57 +1,54 @@
import os
import pickle
import re
import subprocess
from re import findall, sub
from subprocess import run
import pickle
import numpy as np
def get_sample_of_gpu():
no_graph = "NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running."
no_version = "Failed to initialize NVML: Driver/library version mismatch"
smi_string = run(
["rocm-smi", "-P", "--showvoltage", "--showmemuse"], stdout=subprocess.PIPE
)
smi_string = smi_string.stdout.decode("utf-8")
smi_string = smi_string.split("\n")
smi_string = list(filter(lambda x: x, smi_string))
if smi_string[0] == no_graph:
raise Exception("It seems that no AMD GPU is installed")
elif smi_string[0] == no_version:
raise Exception("rocm-smi version mismatch")
else:
results = []
gpuW0 = findall(r"[0-9]*\.[0-9]*", smi_string[2])
gpuW1 = findall(r"[0-9]*\.[0-9]*", smi_string[3])
gpuM0 = findall(r"[0-9]+", smi_string[6])
gpuM1 = findall(r"[0-9]+", smi_string[10])
gpuV0 = findall(r"[0-9]+", smi_string[16])
gpuV1 = findall(r"[0-9]+", smi_string[17])
results.append(float(gpuW0[0]) + float(gpuW1[0]))
if len(gpuM0) == 2 and len(gpuM1) == 2:
results.append(int(gpuM0[1]) + int(gpuM1[1]))
elif len(gpuM0) == 2:
results.append(gpuM0[1])
elif len(gpuM1) == 2:
results.append(gpuM1[1])
results.append(int(gpuV0[1]) + int(gpuV1[1]))
return results
# for l in smi_string:
# temp = findall("[0-9]*MiB | [0-9]*W",l)
# if temp:
# return temp
from re import sub, findall
import subprocess
from subprocess import run
no_graph = "NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running."
no_version = "Failed to initialize NVML: Driver/library version mismatch"
smi_string = run(['rocm-smi', '-P', '--showvoltage', '--showmemuse'], stdout=subprocess.PIPE)
smi_string = smi_string.stdout.decode('utf-8')
smi_string = smi_string.split("\n")
smi_string = list(filter(lambda x: x, smi_string))
if smi_string[0] == no_graph:
raise Exception("It seems that no AMD GPU is installed")
elif smi_string[0] == no_version:
raise Exception("rocm-smi version mismatch")
else:
results= []
gpuW0 = findall("[0-9]*\.[0-9]*",smi_string[2])
gpuW1 = findall("[0-9]*\.[0-9]*",smi_string[4])
gpuM0 = findall("[0-9]+",smi_string[7])
gpuM1 = findall("[0-9]+",smi_string[9])
gpuV0 = findall("[0-9]+",smi_string[13])
gpuV1 = findall("[0-9]+",smi_string[14])
results.append(float(gpuW0[0]) + float(gpuW1[0]))
if len(gpuM0) == 2 and len(gpuM1) == 2:
results.append(int(gpuM0[1]) + int(gpuM1[1]))
elif len(gpuM0) == 2:
results.append(gpuM0[1])
elif len(gpuM1) == 2:
results.append(gpuM1[1])
results.append(int(gpuV0[1]) + int(gpuV1[1]))
return results
#for l in smi_string:
#temp = findall("[0-9]*MiB | [0-9]*W",l)
#if temp:
#return temp
def total_watt_consumed(pickle_name):
with open(pickle_name, "rb") as file:
while True:
try:
x = pickle.load(file)
except EOFError:
break
x = np.array(x)
x = x[:, 0]
# y = [float(re.findall(r"\d+.\d+", xi)[0]) for xi in x]
return sum(x)
with (open(pickle_name, "rb")) as file:
while True:
try:
x = pickle.load(file)
except EOFError:
break
x = np.array(x)
x = x[:,0]
y = [float(re.findall("\d+.\d+",xi)[0]) for xi in x]
return sum(y)

View File

@ -1,5 +0,0 @@
from .BBB.BBBConv import BBBConv2d as BBB_Conv2d
from .BBB.BBBLinear import BBBLinear as BBB_Linear
from .BBB_LRT.BBBConv import BBBConv2d as BBB_LRT_Conv2d
from .BBB_LRT.BBBLinear import BBBLinear as BBB_LRT_Linear
from .misc import FlattenLayer, ModuleWrapper

226
main_bayesian.py Executable file → Normal file
View File

@ -1,28 +1,55 @@
from __future__ import print_function
import os
import pickle
from datetime import datetime
import numpy as np
import data
import utils
import torch
# import pickle
import metrics
import numpy as np
from datetime import datetime
from torch.nn import functional as F
from torch.optim import Adam, lr_scheduler
import data
import metrics
import utils
from models.BayesianModels.Bayesian3Conv3FC import BBB3Conv3FC
from models.BayesianModels.BayesianAlexNet import BBBAlexNet
from models.BayesianModels.BayesianLeNet import BBBLeNet
from stopping_crit import accuracy_bound, e_stop, efficiency_stop, energy_bound
from models.BayesianModels.BayesianAlexNet import BBBAlexNet
from models.BayesianModels.Bayesian3Conv3FC import BBB3Conv3FC
from stopping_crit import earlyStopping, energyBound, accuracyBound
with open("configuration.pkl", "rb") as file:
while True:
try:
cfg = pickle.load(file)
except EOFError:
break
# with (open("configuration.pkl", "rb")) as file:
# while True:
# try:
# cfg = pickle.load(file)
# except EOFError:
# break
cfg = {
"model": {"net_type": "lenet", "type": "bayes", "size": 1,
"layer_type": "lrt", "activation_type": "softplus",
"priors": {
'prior_mu': 0,
'prior_sigma': 0.1,
'posterior_mu_initial': (0, 0.1), # (mean,std) normal_
'posterior_rho_initial': (-5, 0.1), # (mean,std) normal_
},
"n_epochs": 100,
"sens": 1e-9,
"energy_thrs": 100000,
"acc_thrs": 0.99,
"lr": 0.001,
"num_workers": 4,
"valid_size": 0.2,
"batch_size": 256,
"train_ens": 1,
"valid_ens": 1,
"beta_type": 0.1, # 'Blundell','Standard',etc.
# Use float for const value
},
#"data": "CIFAR10",
"data": "MNIST",
"stopping_crit": 1,
"save": 1,
"pickle_path": None,
}
# CUDA settings
@ -30,36 +57,21 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def getModel(net_type, inputs, outputs, priors, layer_type, activation_type):
if net_type == "lenet":
return BBBLeNet(
outputs,
inputs,
priors,
layer_type,
activation_type,
wide=cfg["model"]["size"],
)
elif net_type == "alexnet":
if (net_type == 'lenet'):
return BBBLeNet(outputs, inputs, priors, layer_type, activation_type,
wide=cfg["model"]["size"])
elif (net_type == 'alexnet'):
return BBBAlexNet(outputs, inputs, priors, layer_type, activation_type)
elif net_type == "3conv3fc":
return BBB3Conv3FC(outputs, inputs, priors, layer_type, activation_type)
elif (net_type == '3conv3fc'):
return BBB3Conv3FC(outputs, inputs, priors, layer_type,
activation_type)
else:
raise ValueError(
"Network should be either [LeNet / AlexNet\
/ 3Conv3FC"
)
raise ValueError('Network should be either [LeNet / AlexNet\
/ 3Conv3FC')
def train_model(
net,
optimizer,
criterion,
trainloader,
num_ens=1,
beta_type=0.1,
epoch=None,
num_epochs=None,
):
def train_model(net, optimizer, criterion, trainloader, num_ens=1,
beta_type=0.1, epoch=None, num_epochs=None):
net.train()
training_loss = 0.0
accs = []
@ -69,7 +81,8 @@ def train_model(
optimizer.zero_grad()
inputs, labels = inputs.to(device), labels.to(device)
outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).to(device)
outputs = torch.zeros(inputs.shape[0], net.num_classes,
num_ens).to(device)
kl = 0.0
for j in range(num_ens):
@ -81,17 +94,19 @@ def train_model(
kl_list.append(kl.item())
log_outputs = utils.logmeanexp(outputs, dim=2)
beta = metrics.get_beta(i - 1, len(trainloader), beta_type, epoch, num_epochs)
beta = metrics.get_beta(i-1, len(trainloader), beta_type,
epoch, num_epochs)
loss = criterion(log_outputs, labels, kl, beta)
loss.backward()
optimizer.step()
accs.append(metrics.acc(log_outputs.data, labels))
training_loss += loss.cpu().data.numpy()
return training_loss / len(trainloader), np.mean(accs), np.mean(kl_list)
return training_loss/len(trainloader), np.mean(accs), np.mean(kl_list)
def validate_model(net, criterion, validloader, num_ens=1, beta_type=0.1, epoch=None, num_epochs=None):
def validate_model(net, criterion, validloader, num_ens=1, beta_type=0.1,
epoch=None, num_epochs=None):
"""Calculate ensemble accuracy and NLL Loss"""
net.train()
valid_loss = 0.0
@ -99,7 +114,8 @@ def validate_model(net, criterion, validloader, num_ens=1, beta_type=0.1, epoch=
for i, (inputs, labels) in enumerate(validloader):
inputs, labels = inputs.to(device), labels.to(device)
outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).to(device)
outputs = torch.zeros(inputs.shape[0], net.num_classes,
num_ens).to(device)
kl = 0.0
for j in range(num_ens):
net_out, _kl = net(inputs)
@ -108,18 +124,16 @@ def validate_model(net, criterion, validloader, num_ens=1, beta_type=0.1, epoch=
log_outputs = utils.logmeanexp(outputs, dim=2)
beta = metrics.get_beta(i - 1, len(validloader), beta_type, epoch, num_epochs)
beta = metrics.get_beta(i-1, len(validloader), beta_type,
epoch, num_epochs)
valid_loss += criterion(log_outputs, labels, kl, beta).item()
accs.append(metrics.acc(log_outputs, labels))
return valid_loss / len(validloader), np.mean(accs)
return valid_loss/len(validloader), np.mean(accs)
def run(dataset, net_type):
# Noise applied to dataset
noise_type = cfg["noise_type"]
mean = 0.5
std = 0.5
# Hyper Parameter settings
layer_type = cfg["model"]["layer_type"]
activation_type = cfg["model"]["activation_type"]
@ -134,13 +148,14 @@ def run(dataset, net_type):
batch_size = cfg["model"]["batch_size"]
beta_type = cfg["model"]["beta_type"]
trainset, testset, inputs, outputs = data.getDataset(dataset, noise_type, mean=mean, std=std)
trainset, testset, inputs, outputs = data.getDataset(dataset)
train_loader, valid_loader, test_loader = data.getDataloader(
trainset, testset, valid_size, batch_size, num_workers
)
net = getModel(net_type, inputs, outputs, priors, layer_type, activation_type).to(device)
trainset, testset, valid_size, batch_size, num_workers)
net = getModel(net_type, inputs, outputs, priors, layer_type,
activation_type).to(device)
ckpt_dir = f"checkpoints/{dataset}/bayesian"
ckpt_dir = f'checkpoints/{dataset}/bayesian'
ckpt_name = f'checkpoints/{dataset}/bayesian/model_{net_type}_{layer_type}_{activation_type}_{cfg["model"]["size"]}'
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir, exist_ok=True)
@ -150,80 +165,71 @@ def run(dataset, net_type):
criterion = metrics.ELBO(len(trainset)).to(device)
optimizer = Adam(net.parameters(), lr=lr_start)
lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer, patience=6, verbose=True)
lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer, patience=6,
verbose=True)
# valid_loss_max = np.Inf
# if stp == 2:
early_stop = []
train_data = []
for epoch in range(n_epochs): # loop over the dataset multiple times
train_loss, train_acc, train_kl = train_model(
net,
optimizer,
criterion,
train_loader,
num_ens=train_ens,
beta_type=beta_type,
epoch=epoch,
num_epochs=n_epochs,
)
valid_loss, valid_acc = validate_model(
net,
criterion,
valid_loader,
num_ens=valid_ens,
beta_type=beta_type,
epoch=epoch,
num_epochs=n_epochs,
)
train_loss, train_acc, train_kl = train_model(net, optimizer,
criterion,
train_loader,
num_ens=train_ens,
beta_type=beta_type,
epoch=epoch,
num_epochs=n_epochs)
valid_loss, valid_acc = validate_model(net, criterion, valid_loader,
num_ens=valid_ens,
beta_type=beta_type,
epoch=epoch,
num_epochs=n_epochs)
lr_sched.step(valid_loss)
train_data.append([epoch, train_loss, train_acc, valid_loss, valid_acc])
print(
"Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy:\
train_data.append([epoch, train_loss, train_acc, valid_loss,
valid_acc])
print('Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy:\
{:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy:\
{:.4f} \ttrain_kl_div: {:.4f}".format(
epoch, train_loss, train_acc, valid_loss, valid_acc, train_kl
)
)
ckpt_name = f'checkpoints/{dataset}/bayesian/model_{net_type}_{layer_type}_{activation_type}_{cfg["model"]["size"]}_epoch_{epoch}_noise_{noise_type}.pt'
if sav == 1:
torch.save(net.state_dict(), ckpt_name)
{:.4f} \ttrain_kl_div: {:.4f}'.format(epoch, train_loss,
train_acc, valid_loss,
valid_acc, train_kl))
if stp == 2:
# print("Using early stopping")
if e_stop(early_stop, valid_acc, epoch + 1, 2, cfg["model"]["sens"]) == 1:
print('Using early stopping')
if earlyStopping(early_stop, valid_acc, epoch,
cfg["model"]["sens"]) == 1:
break
elif stp == 3:
# print("Using energy bound")
if energy_bound(cfg["model"]["energy_thrs"]) == 1:
print('Using energy bound')
if energyBound(cfg["model"]["energy_thrs"]) == 1:
break
elif stp == 4:
if dataset == "MNIST":
# print("Using accuracy bound")
if accuracy_bound(train_acc, 0.99) == 1:
break
else:
# print("Using accuracy bound")
if accuracy_bound(train_acc, 0.50) == 1:
break
elif stp == 5:
# print("Using efficiency stoping")
if efficiency_stop(net, train_acc, batch_size, 0.002) == 1:
print('Using accuracy bound')
if accuracyBound(train_acc, cfg["model"]["acc_thrs"]) == 1:
break
else:
print(f"Training for {cfg['model']['n_epochs']} epochs")
print('Training for {} epochs'.format(cfg["model"]["n_epochs"]))
with open("bayes_exp_data_" + str(cfg["model"]["size"]) + f"_{dataset}" + ".pkl", "wb") as f:
pickle.dump(train_data, f)
if sav == 1:
# save model when finished
# if epoch == cfg["model"]["n_epochs"]-1:
torch.save({
'epoch': epoch,
'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': train_loss,
}, ckpt_name + '_epoch_{}.pt'.format(epoch))
# with open("bayes_exp_data_"+str(cfg["model"]["size"])+".pkl", 'wb') as f:
# pickle.dump(train_data, f)
if __name__ == "__main__":
if __name__ == '__main__':
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Initial Time =", current_time)
print(f"Using bayesian model of size: {cfg["model"]["size"]}")
print("Using bayesian model of size: {}".format(cfg["model"]["size"]))
run(cfg["data"], cfg["model"]["net_type"])
now = datetime.now()
current_time = now.strftime("%H:%M:%S")

156
main_frequentist.py Executable file → Normal file
View File

@ -1,44 +1,68 @@
from __future__ import print_function
import os
import pickle
from datetime import datetime
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam, lr_scheduler
import data
import torch
# import pickle
import metrics
from models.NonBayesianModels.AlexNet import AlexNet
import numpy as np
import torch.nn as nn
from datetime import datetime
from torch.optim import Adam, lr_scheduler
from models.NonBayesianModels.LeNet import LeNet
from models.NonBayesianModels.AlexNet import AlexNet
from stopping_crit import earlyStopping, energyBound, accuracyBound
from models.NonBayesianModels.ThreeConvThreeFC import ThreeConvThreeFC
from stopping_crit import accuracy_bound, e_stop, efficiency_stop, energy_bound
with open("configuration.pkl", "rb") as file:
while True:
try:
cfg = pickle.load(file)
except EOFError:
break
# with (open("configuration.pkl", "rb")) as file:
# while True:
# try:
# cfg = pickle.load(file)
# except EOFError:
# break
cfg = {
"model": {"net_type": "lenet", "type": "freq", "size": 1,
"layer_type": "lrt", "activation_type": "softplus",
"priors": {
'prior_mu': 0,
'prior_sigma': 0.1,
'posterior_mu_initial': (0, 0.1), # (mean,std) normal_
'posterior_rho_initial': (-5, 0.1), # (mean,std) normal_
},
"n_epochs": 100,
"sens": 1e-9,
"energy_thrs": 100000,
"acc_thrs": 0.99,
"lr": 0.001,
"num_workers": 4,
"valid_size": 0.2,
"batch_size": 256,
"train_ens": 1,
"valid_ens": 1,
"beta_type": 0.1, # 'Blundell','Standard',etc.
# Use float for const value
},
#"data": "CIFAR10",
"data": "MNIST",
"stopping_crit": 1,
"save": 1,
"pickle_path": None,
}
# CUDA settings
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def getModel(net_type, inputs, outputs, wide=cfg["model"]["size"]):
if net_type == "lenet":
if (net_type == 'lenet'):
return LeNet(outputs, inputs, wide)
elif net_type == "alexnet":
elif (net_type == 'alexnet'):
return AlexNet(outputs, inputs)
elif net_type == "3conv3fc":
elif (net_type == '3conv3fc'):
return ThreeConvThreeFC(outputs, inputs)
else:
raise ValueError(
"Network should be either [LeNet / AlexNet / \
3Conv3FC"
)
raise ValueError('Network should be either [LeNet / AlexNet / \
3Conv3FC')
def train_model(net, optimizer, criterion, train_loader):
@ -52,7 +76,7 @@ def train_model(net, optimizer, criterion, train_loader):
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item() * data.size(0)
train_loss += loss.item()*data.size(0)
accs.append(metrics.acc(output.detach(), target))
return train_loss, np.mean(accs)
@ -65,16 +89,13 @@ def validate_model(net, criterion, valid_loader):
data, target = datas.to(device), target.to(device)
output = net(data)
loss = criterion(output, target)
valid_loss += loss.item() * data.size(0)
valid_loss += loss.item()*data.size(0)
accs.append(metrics.acc(output.detach(), target))
return valid_loss, np.mean(accs)
def run(dataset, net_type):
# Noise on dataset
noise_type = None
mean = 0.5
std = 1
# Hyper Parameter settings
n_epochs = cfg["model"]["n_epochs"]
lr = cfg["model"]["lr"]
@ -82,13 +103,13 @@ def run(dataset, net_type):
valid_size = cfg["model"]["valid_size"]
batch_size = cfg["model"]["batch_size"]
trainset, testset, inputs, outputs = data.getDataset(dataset, noise_type, mean=mean, std=std)
trainset, testset, inputs, outputs = data.getDataset(dataset)
train_loader, valid_loader, test_loader = data.getDataloader(
trainset, testset, valid_size, batch_size, num_workers
)
trainset, testset, valid_size, batch_size, num_workers)
net = getModel(net_type, inputs, outputs).to(device)
ckpt_dir = f"checkpoints/{dataset}/frequentist"
ckpt_dir = f'checkpoints/{dataset}/frequentist'
ckpt_name = f'checkpoints/{dataset}/frequentist/model_{net_type}_{cfg["model"]["size"]}'
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir, exist_ok=True)
@ -98,62 +119,61 @@ def run(dataset, net_type):
criterion = nn.CrossEntropyLoss()
optimizer = Adam(net.parameters(), lr=lr)
lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer, patience=6, verbose=True)
lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer, patience=6,
verbose=True)
# valid_loss_min = np.Inf
# if stp == 2:
early_stop = []
train_data = []
for epoch in range(1, n_epochs + 1):
for epoch in range(1, n_epochs+1):
train_loss, train_acc = train_model(net, optimizer, criterion, train_loader)
train_loss, train_acc = train_model(net, optimizer, criterion,
train_loader)
valid_loss, valid_acc = validate_model(net, criterion, valid_loader)
lr_sched.step(valid_loss)
train_loss = train_loss / len(train_loader.dataset)
valid_loss = valid_loss / len(valid_loader.dataset)
train_loss = train_loss/len(train_loader.dataset)
valid_loss = valid_loss/len(valid_loader.dataset)
train_data.append([epoch, train_loss, train_acc, valid_loss, valid_acc])
print(
"Epoch: {} \tTraining Loss: {: .4f} \tTraining Accuracy: {: .4f}\
train_data.append([epoch, train_loss, train_acc, valid_loss,
valid_acc])
print('Epoch: {} \tTraining Loss: {: .4f} \tTraining Accuracy: {: .4f}\
\tValidation Loss: {: .4f} \tValidation Accuracy: {: .4f}\
".format(
epoch, train_loss, train_acc, valid_loss, valid_acc
)
)
ckpt_name = f'checkpoints/{dataset}/frequentist/model_{net_type}_{cfg["model"]["size"]}_epoch_{epoch}_noise_{noise_type}.pt'
if sav == 1:
torch.save(net.state_dict(), ckpt_name)
'.format(epoch, train_loss, train_acc, valid_loss, valid_acc))
if stp == 2:
# print("Using early stopping")
if e_stop(early_stop, valid_acc, epoch, 2, cfg["model"]["sens"]) == 1:
# print('Using early stopping')
if earlyStopping(early_stop, valid_acc, epoch,
cfg["model"]["sens"]) == 1:
break
elif stp == 3:
# print('Using energy bound')
if energy_bound(cfg["model"]["energy_thrs"]) == 1:
if energyBound(cfg["model"]["energy_thrs"]) == 1:
break
elif stp == 4:
if dataset == "MNIST":
# print("Using accuracy bound")
if accuracy_bound(train_acc, 0.99) == 1:
break
else:
# print("Using accuracy bound")
if accuracy_bound(train_acc, 0.50) == 1:
break
elif stp == 5:
# print("Using efficiency bound")
if efficiency_stop(net, train_acc, batch_size, 0.25) == 1:
# print('Using accuracy bound')
if accuracyBound(train_acc,
cfg["model"]["acc_thrs"]) == 1:
break
else:
print("Training for {} epochs".format(cfg["model"]["n_epochs"]))
print('Training for {} epochs'.format(cfg["model"]["n_epochs"]))
with open("freq_exp_data_" + str(cfg["model"]["size"]) + f"_{dataset}" + ".pkl", "wb") as f:
pickle.dump(train_data, f)
if sav == 1:
# save model when finished
# if epoch == n_epochs:
# torch.save(net.state_dict(), ckpt_name)
torch.save({
'epoch': epoch,
'model_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': train_loss,
}, ckpt_name + '_epoch_{}.pt'.format(epoch))
# with open("freq_exp_data_"+str(cfg["model"]["size"])+".pkl", 'wb') as f:
# pickle.dump(train_data, f)
if __name__ == "__main__":
if __name__ == '__main__':
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Initial Time =", current_time)

View File

@ -1,4 +1,5 @@
#!/usr/bin/env bash
#!/bin/env bash
while true
do

View File

@ -1,16 +1,8 @@
import math
import torch.nn as nn
from layers import (
BBB_Conv2d,
BBB_Linear,
BBB_LRT_Conv2d,
BBB_LRT_Linear,
FlattenLayer,
ModuleWrapper,
)
from layers import BBB_Linear, BBB_Conv2d
from layers import BBB_LRT_Linear, BBB_LRT_Conv2d
from layers import FlattenLayer, ModuleWrapper
class BBB3Conv3FC(ModuleWrapper):
"""
@ -18,28 +10,25 @@ class BBB3Conv3FC(ModuleWrapper):
Simple Neural Network having 3 Convolution
and 3 FC layers with Bayesian layers.
"""
def __init__(
self, outputs, inputs, priors, layer_type="lrt", activation_type="softplus"
):
def __init__(self, outputs, inputs, priors, layer_type='lrt', activation_type='softplus'):
super(BBB3Conv3FC, self).__init__()
self.num_classes = outputs
self.layer_type = layer_type
self.priors = priors
if layer_type == "lrt":
if layer_type=='lrt':
BBBLinear = BBB_LRT_Linear
BBBConv2d = BBB_LRT_Conv2d
elif layer_type == "bbb":
elif layer_type=='bbb':
BBBLinear = BBB_Linear
BBBConv2d = BBB_Conv2d
else:
raise ValueError("Undefined layer_type")
if activation_type == "softplus":
if activation_type=='softplus':
self.act = nn.Softplus
elif activation_type == "relu":
elif activation_type=='relu':
self.act = nn.ReLU
else:
raise ValueError("Only softplus or relu supported")

View File

@ -1,9 +0,0 @@
[tool.pylint.'FORMAT']
max-line-length = 110
[tool.pylint.'MESSAGES CONTROL']
disable = ["missing-module-docstring", "missing-function-docstring", "import-error"]
[tool.black]
line-length = 110

View File

@ -1,3 +1,3 @@
#!/usr/bin/env bash
#!/bin/env bash
radeontop -b 08 -d - > $1

View File

@ -1,17 +1,15 @@
import pickle
gpu_data = []
with open("configuration.pkl", "rb") as openfile:
with (open("bayesian_wattdata_3.pkl", "rb")) as openfile:
while True:
try:
gpu_data = pickle.load(openfile)
except EOFError:
break
print(gpu_data)
# exp_data = []
# with (open("bayes_exp_data_6.pkl", "rb")) as openfile:
#exp_data = []
#with (open("bayes_exp_data_6.pkl", "rb")) as openfile:
# while True:
# try:
# exp_data = pickle.load(openfile)

View File

@ -1,12 +1,9 @@
import pickle
# from pathlib import Path
import subprocess as sub
from time import sleep
import psutil
import pickle
import arguments
from time import sleep
#from pathlib import Path
import subprocess as sub
from arguments import makeArguments
@ -18,36 +15,30 @@ def kill(proc_pid):
cfg = {
"model": {
"net_type": None,
"type": None,
"size": None,
"layer_type": "lrt",
"activation_type": "softplus",
"priors": {
"prior_mu": 0,
"prior_sigma": 0.1,
"posterior_mu_initial": (0, 0.1), # (mean,std) normal_
"posterior_rho_initial": (-5, 0.1), # (mean,std) normal_
},
"n_epochs": 100,
"sens": 1e-9,
"energy_thrs": 100000,
"acc_thrs": 0.99,
"lr": 0.001,
"num_workers": 4,
"valid_size": 0.2,
"batch_size": 256,
"train_ens": 1,
"valid_ens": 1,
"beta_type": 0.1, # 'Blundell','Standard',etc.
# Use float for const value
},
"data": None,
"noise_type": None,
"stopping_crit": None,
"save": None,
"pickle_path": None,
"model": {"net_type": None, "type": None, "size": None, "layer_type":
"lrt", "activation_type": "softplus", "priors": {
'prior_mu': 0,
'prior_sigma': 0.1,
'posterior_mu_initial': (0, 0.1), # (mean,std) normal_
'posterior_rho_initial': (-5, 0.1), # (mean,std) normal_
},
"n_epochs": 100,
"sens": 1e-9,
"energy_thrs": 100000,
"acc_thrs": 0.99,
"lr": 0.001,
"num_workers": 4,
"valid_size": 0.2,
"batch_size": 256,
"train_ens": 1,
"valid_ens": 1,
"beta_type": 0.1, # 'Blundell','Standard',etc.
# Use float for const value
},
"data": None,
"stopping_crit": None,
"save": None,
"pickle_path": None,
}
args = makeArguments(arguments.all_args)
@ -56,10 +47,10 @@ check = list(args.values())
if all(v is None for v in check):
raise Exception("One argument required")
elif None in check:
if args["f"] is not None:
if args['f'] is not None:
cmd = ["python", "main_frequentist.py"]
cfg["model"]["type"] = "freq"
elif args["b"] is not None:
elif args['b'] is not None:
cmd = ["python", "main_bayesian.py"]
cfg["model"]["type"] = "bayes"
else:
@ -70,30 +61,26 @@ wide = args["f"] or args["b"]
cfg["model"]["size"] = wide
cfg["data"] = args["dataset"]
cfg["noise_type"] = args["noise_type"]
cfg["model"]["net_type"] = args["net_type"]
if args["EarlyStopping"]:
if args['EarlyStopping']:
cfg["stopping_crit"] = 2
elif args["EnergyBound"]:
elif args['EnergyBound']:
cfg["stopping_crit"] = 3
elif args["AccuracyBound"]:
elif args['AccuracyBound']:
cfg["stopping_crit"] = 4
elif args["EfficiencyStopping"]:
cfg["stopping_crit"] = 5
else:
cfg["stopping_crit"] = 1
if args["Save"]:
if args['Save']:
cfg["save"] = 1
else:
cfg["save"] = 0
cfg["pickle_path"] = (
f"{cfg['model']['type']}_wattdata_{cfg['model']['size']}_{cfg['data']}.pkl"
)
cfg["pickle_path"] = "{}_wattdata_{}.pkl".format(cfg["model"]["type"],
cfg["model"]["size"])
with open("configuration.pkl", "wb") as f:
pickle.dump(cfg, f)
@ -107,35 +94,34 @@ cpu_watt = "cpu_watt.sh"
ram = "mem_free.sh"
gpu = "radeontop.sh"
# path_cpu_watt = Path(cpu_watt)
# path_ram = Path(ram)
# path_gpu = Path(gpu)
#path_cpu_watt = Path(cpu_watt)
#path_ram = Path(ram)
#path_gpu = Path(gpu)
# path_cpu_watt = str(Path(cpu_watt).absolute()) + '/' + cpu_watt
# path_ram = str(Path(ram).absolute()) + '/' + ram
# path_gpu = str(Path(gpu).absolute()) + '/' + gpu
#path_cpu_watt = str(Path(cpu_watt).absolute()) + '/' + cpu_watt
#path_ram = str(Path(ram).absolute()) + '/' + ram
#path_gpu = str(Path(gpu).absolute()) + '/' + gpu
if cmd[1] == "main_frequentist.py":
cmd2 = ["./" + cpu_watt, f"freq_{wide}_cpu_watts_{cfg['data']}"]
cmd3 = ["./" + ram, f"freq_{wide}_ram_use_{cfg['data']}"]
cmd4 = ["./" + gpu, f"freq_{wide}_flop_app_{cfg['data']}"]
cmd2 = ['./'+cpu_watt, "freq_{}_cpu_watts".format(wide)]
cmd3 = ['./'+ram, "freq_{}_ram_use".format(wide)]
cmd4 = ['./'+gpu, "freq_{}_flop_app".format(wide)]
elif cmd[1] == "main_bayesian.py":
cmd2 = ["./" + cpu_watt, f"bayes_{wide}_cpu_watts_{cfg['data']}"]
cmd3 = ["./" + ram, f"bayes_{wide}_ram_use_{cfg['data']}"]
cmd4 = ["./" + gpu, f"bayes_{wide}_flop_app_{cfg['data']}"]
cmd2 = ['./'+cpu_watt, "bayes_{}_cpu_watts".format(wide)]
cmd3 = ['./'+ram, "bayes_{}_ram_use".format(wide)]
cmd4 = ['./'+gpu, "bayes_{}_flop_app".format(wide)]
path = sub.check_output(["pwd"])
path = sub.check_output(['pwd'])
path = path.decode()
path = path.replace("\n", "")
path = path.replace('\n', '')
startWattCounter = "python " + path + "/amd_sample_draw.py"
startWattCounter = 'python ' + path + '/amd_sample_draw.py'
p1 = sub.Popen(cmd)
p2 = sub.Popen(
startWattCounter.split(), stdin=sub.PIPE, stdout=sub.PIPE, stderr=sub.PIPE
)
p2 = sub.Popen(startWattCounter.split(), stdin=sub.PIPE, stdout=sub.PIPE,
stderr=sub.PIPE)
p3 = sub.Popen(cmd2, stdin=sub.PIPE, stdout=sub.PIPE, stderr=sub.PIPE)
p4 = sub.Popen(cmd3, stdin=sub.PIPE, stdout=sub.PIPE, stderr=sub.PIPE)
p5 = sub.Popen(cmd4, stdin=sub.PIPE, stdout=sub.PIPE, stderr=sub.PIPE)

View File

@ -1,128 +1,44 @@
# import math
import pickle
from time import sleep
from gpu_power_func import total_watt_consumed
with open("configuration.pkl", "rb") as file:
with (open("configuration.pkl", "rb")) as file:
while True:
try:
cfg = pickle.load(file)
except EOFError:
break
def non_decreasing(lst: list):
"""
Check that a list is non decreasing
"""
return all(x <= y for x, y in zip(lst, lst[1:]))
def non_increasing(lst):
"""
Check that a list is non inreasing
"""
return all(x >= y for x, y in zip(lst, lst[1:]))
def monotonic(lst):
"""
Check that a list is monotonic
"""
return non_decreasing(lst) or non_increasing(lst)
def strictly_increasing(lst):
"""
Check that a list is strictly inreasing
"""
return all(x < y for x, y in zip(lst, lst[1:]))
def strictly_decreasing(lst):
"""
Check that a list is strictly decreasing
"""
return all(x > y for x, y in zip(lst, lst[1:]))
def strictly_monotonic(lst):
"""
Check that a list is strictly monotonic
"""
return strictly_increasing(lst) or strictly_decreasing(lst)
def count_parameters(model):
"""Counts model amount of trainable parameters"""
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def efficiency_stop(model, accuracy, batch, sensitivity=0.001):
"""
This function stops when a certain amount of generalization takes place
taking into account the model efficiency
"""
try:
energy = total_watt_consumed(cfg["pickle_path"])
except Exception as e:
sleep(3)
energy = total_watt_consumed(cfg["pickle_path"])
efficiency = accuracy / energy
print(f"Current Efficiency: {1 - efficiency}")
no_parameters = count_parameters(model)
if (efficiency * no_parameters / (batch / 2) >= sensitivity) and (accuracy >= 0.5):
return 1
return 0
def e_stop(
early_stopping: list,
train_acc: float,
epoch: int,
patience: int = 4,
sensitivity: float = 1e-9,
):
"""
This function stops training early
"""
def earlyStopping(early_stopping: list, train_acc: float, epoch: int, sensitivity: float=1e-9):
early_stopping.append(train_acc)
if patience in (0, 1):
print("Stopping Early")
return 1
if epoch % patience == 0 and epoch > 0:
early_stopping = early_stopping[-patience : len(early_stopping)]
ini = early_stopping.pop(0)
early_stopping = list(map(lambda x: x - sensitivity, early_stopping))
early_stopping.insert(0, ini)
values = ""
for i, v in enumerate(early_stopping):
values += f"Value {i+1}: {v} > "
print(values)
if (train_acc > 0.5) and not strictly_increasing(early_stopping):
print("Stopping Early")
return 1
if epoch % 4 == 0 and epoch > 0:
print("Value 1: {} > Value 2: {} > \
Value 3: {}".format(early_stopping[0], \
abs(early_stopping[1]-sensitivity), \
abs(early_stopping[2]-sensitivity)))
if train_acc > 0.5:
if early_stopping[0] > abs(early_stopping[1]-sensitivity) and \
early_stopping[1] > abs(early_stopping[2]-sensitivity):
print("Stopping Early")
return 1
del early_stopping[:]
return 0
def energy_bound(threshold: float = 100000.0):
"""Stops training when a specified amount of energy is consumed"""
def energyBound(threshold: float=100000.0):
try:
energy = total_watt_consumed(cfg["pickle_path"])
except Exception as e:
sleep(3)
energy = total_watt_consumed(cfg["pickle_path"])
print(f"Energy used: {energy}")
print("Energy used: {}".format(energy))
if energy > threshold:
print("Energy bound achieved")
return 1
return 0
def accuracy_bound(train_acc: float, threshold: float = 0.99):
"""Stops training when a specified amount of accuracy is achieved"""
def accuracyBound(train_acc: float, threshold: float=0.99):
if train_acc >= threshold:
print("Accuracy bound achieved")
return 1