From c8d8253d2424757c069620d24a48707698c8e3ac Mon Sep 17 00:00:00 2001 From: Eduardo Cueto-Mendoza Date: Wed, 29 Jan 2025 11:26:17 +0000 Subject: [PATCH] Modified stopping criteria and add of new data file (previously ignored) --- .gitignore | 7 +- arguments.py | 13 ++ data/__init__.py | 1 + data/data.py | 405 ++++++++++++++++++++++++++++++++++++++++++++ gpu_power_func.py | 5 +- layers/__init__.py | 5 + main_bayesian.py | 46 ++--- main_frequentist.py | 35 ++-- pyproject.toml | 9 + read_pickle.py | 8 +- run_service.py | 120 +++++++------ stopping_crit.py | 72 ++++++-- 12 files changed, 617 insertions(+), 109 deletions(-) create mode 100755 data/__init__.py create mode 100755 data/data.py create mode 100755 layers/__init__.py create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index 74faa9d..8d93728 100755 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,22 @@ **/**/__pycache__/ -**/**/__init__.py **/__pycache__/ -**/__init__.py checkpoints/ __pycache__/ data_budget/ bayes_* times_* +data/cifar-10-batches-py/ +data/MNIST/ .vscode freq_* -data/ .idea *.pkl *.txt +*.tar.gz stp sav bay frq sav tmp +*_DATA diff --git a/arguments.py b/arguments.py index d55de32..cf2b211 100755 --- a/arguments.py +++ b/arguments.py @@ -34,10 +34,23 @@ def makeArguments(arguments: ArgumentParser) -> dict: all_args.add_argument( "-a", "--AccuracyBound", action="store_true", help="Accuracy Bound criteria" ) + all_args.add_argument( + "-x", + "--EfficiencyStopping", + action="store_true", + help="Efficiency Stopping criteria", + ) all_args.add_argument("-s", "--Save", action="store_true", help="Save model") all_args.add_argument( "--net_type", default="lenet", type=str, help="model = [lenet/AlexNet/3Conv3FC]" ) + all_args.add_argument( + "-N", + "--noise_type", + default=None, + type=str, + help="noise = [Gaussian(m,s)/Raleigh(a,b)/Erlang(a,b)/Exponential(a)/Uniform(a,b)/Impulse(a)]", + ) all_args.add_argument( "--dataset", default="CIFAR10", diff --git a/data/__init__.py b/data/__init__.py new file mode 100755 index 0000000..ba6e3f8 --- /dev/null +++ b/data/__init__.py @@ -0,0 +1 @@ +from .data import getDataloader, getDataset diff --git a/data/data.py b/data/data.py new file mode 100755 index 0000000..8077da2 --- /dev/null +++ b/data/data.py @@ -0,0 +1,405 @@ +import random + +import numpy as np +import torch +import torchvision +import torchvision.transforms as transforms +from torch.utils.data import Dataset +from torch.utils.data.sampler import SubsetRandomSampler + + +class AddNoNoise(object): + def __init__(self, mean=0.0, std=1.0): + self.std = std + self.mean = mean + + def __call__(self, tensor): + return tensor + + def __repr__(self): + return self.__class__.__name__ + "No noise" + + +class AddGaussianNoise(object): + def __init__(self, mean=0.0, std=1.0): + self.std = std + self.mean = mean + + def __call__(self, tensor): + return tensor + torch.randn(tensor.size()) * self.std + self.mean + + def __repr__(self): + return self.__class__.__name__ + "(mean={0}, std={1})".format( + self.mean, self.std + ) + + +class AddRaleighNoise(object): + def __init__(self, a=0.0, b=0.0): + self.std = (b * (4 - np.pi)) / 4 + self.mean = a + np.sqrt((np.pi * b) / 4) + + def __call__(self, tensor): + return tensor + torch.randn(tensor.size()) * self.std + self.mean + + def __repr__(self): + return self.__class__.__name__ + "(mean={0}, std={1})".format( + self.mean, self.std + ) + + +class AddErlangNoise(object): + def __init__(self, a=0.0, b=0.0): + if a == 0.0: + self.std = 0.0 + self.mean = 0.0 + else: + self.std = b / a + self.mean = b / (2 * a) + + def __call__(self, tensor): + if self.mean == 0.0: + return tensor * self.mean + else: + return tensor + torch.randn(tensor.size()) * self.std + self.mean + + def __repr__(self): + return self.__class__.__name__ + "(mean={0}, std={1})".format( + self.mean, self.std + ) + + +class AddExponentialNoise(object): + def __init__(self, a=0.0, b=0): + if a == 0.0: + self.mean = 0.0 + else: + self.std = 1 / (2 * a) + self.mean = 1 / a + + def __call__(self, tensor): + if self.mean == 0.0: + return tensor * self.mean + else: + return tensor + torch.randn(tensor.size()) * self.std + self.mean + + def __repr__(self): + return self.__class__.__name__ + "(mean={0}, std={1})".format( + self.mean, self.std + ) + + +class AddUniformNoise(object): + def __init__(self, a=0.0, b=0.0): + if a == 0.0: + self.std = 0.0 + self.mean = 0.0 + else: + self.std = (b - a) ** 2 / 12 + self.mean = (b + a) / 2 + + def __call__(self, tensor): + if self.mean == 0.0: + return tensor * self.mean + else: + return tensor + (torch.randn(tensor.size()) * self.std + self.mean) + + def __repr__(self): + return self.__class__.__name__ + "(mean={0}, std={1})".format( + self.mean, self.std + ) + + +class AddImpulseNoise(object): + def __init__(self, a=0.0, b=0): + self.value = a + + def __call__(self, tensor): + if random.gauss(0, 1) > 0: + return tensor * self.value + elif random.gauss(0, 1) < 0: + return tensor * (-1 * self.value) + else: + return tensor * 0.0 + + def __repr__(self): + return self.__class__.__name__ + "(a={0})".format(self.value) + + +class CustomDataset(Dataset): + def __init__(self, data, labels, transform=None): + self.data = data + self.labels = labels + self.transform = transform + + def __len__(self): + return len(self.labels) + + def __getitem__(self, idx): + sample = self.data[idx] + label = self.labels[idx] + if self.transform: + sample = self.transform(sample) + + return sample, label + + +def extract_classes(dataset, classes): + idx = torch.zeros_like(dataset.targets, dtype=torch.bool) + for target in classes: + idx = idx | (dataset.targets == target) + + data, targets = dataset.data[idx], dataset.targets[idx] + return data, targets + + +def getDataset(dataset, noise=None, mean=0.0, std=0.0): + """Function to get training datasets""" + noise_type = None + if noise is None: + # print("No noise added") + noise_type = AddNoNoise + elif noise == "gaussian": + noise_type = AddGaussianNoise + elif noise == "raleigh": + noise_type = AddRaleighNoise + elif noise == "erlang": + noise_type = AddErlangNoise + elif noise == "exponential": + noise_type = AddExponentialNoise + elif noise == "uniform": + noise_type = AddUniformNoise + elif noise == "impulse": + noise_type = AddImpulseNoise + + print(f"{noise_type} noise added") + transform_split_mnist = transforms.Compose( + [ + transforms.ToPILImage(), + transforms.Resize((32, 32)), + transforms.ToTensor(), + noise_type(mean, std), + ] + ) + + transform_mnist = transforms.Compose( + [ + transforms.Resize((32, 32)), + transforms.ToTensor(), + noise_type(mean, std), + ] + ) + + transform_cifar = transforms.Compose( + [ + transforms.Resize((32, 32)), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + noise_type(mean, std), + ] + ) + + if dataset == "CIFAR10": + trainset = torchvision.datasets.CIFAR10( + root="./data", train=True, download=True, transform=transform_cifar + ) + testset = torchvision.datasets.CIFAR10( + root="./data", train=False, download=True, transform=transform_cifar + ) + num_classes = 10 + inputs = 3 + + elif dataset == "CIFAR100": + trainset = torchvision.datasets.CIFAR100( + root="./data", train=True, download=True, transform=transform_cifar + ) + testset = torchvision.datasets.CIFAR100( + root="./data", train=False, download=True, transform=transform_cifar + ) + num_classes = 100 + inputs = 3 + + elif dataset == "MNIST": + trainset = torchvision.datasets.MNIST( + root="./data", train=True, download=True, transform=transform_mnist + ) + testset = torchvision.datasets.MNIST( + root="./data", train=False, download=True, transform=transform_mnist + ) + num_classes = 10 + inputs = 1 + + elif dataset == "SplitMNIST-2.1": + trainset = torchvision.datasets.MNIST( + root="./data", train=True, download=True, transform=transform_mnist + ) + testset = torchvision.datasets.MNIST( + root="./data", train=False, download=True, transform=transform_mnist + ) + + train_data, train_targets = extract_classes(trainset, [0, 1, 2, 3, 4]) + test_data, test_targets = extract_classes(testset, [0, 1, 2, 3, 4]) + + trainset = CustomDataset( + train_data, train_targets, transform=transform_split_mnist + ) + testset = CustomDataset( + test_data, test_targets, transform=transform_split_mnist + ) + num_classes = 5 + inputs = 1 + + elif dataset == "SplitMNIST-2.2": + trainset = torchvision.datasets.MNIST( + root="./data", train=True, download=True, transform=transform_mnist + ) + testset = torchvision.datasets.MNIST( + root="./data", train=False, download=True, transform=transform_mnist + ) + + train_data, train_targets = extract_classes(trainset, [5, 6, 7, 8, 9]) + test_data, test_targets = extract_classes(testset, [5, 6, 7, 8, 9]) + train_targets -= 5 # Mapping target 5-9 to 0-4 + test_targets -= 5 # Hence, add 5 after prediction + + trainset = CustomDataset( + train_data, train_targets, transform=transform_split_mnist + ) + testset = CustomDataset( + test_data, test_targets, transform=transform_split_mnist + ) + num_classes = 5 + inputs = 1 + + elif dataset == "SplitMNIST-5.1": + trainset = torchvision.datasets.MNIST( + root="./data", train=True, download=True, transform=transform_mnist + ) + testset = torchvision.datasets.MNIST( + root="./data", train=False, download=True, transform=transform_mnist + ) + + train_data, train_targets = extract_classes(trainset, [0, 1]) + test_data, test_targets = extract_classes(testset, [0, 1]) + + trainset = CustomDataset( + train_data, train_targets, transform=transform_split_mnist + ) + testset = CustomDataset( + test_data, test_targets, transform=transform_split_mnist + ) + num_classes = 2 + inputs = 1 + + elif dataset == "SplitMNIST-5.2": + trainset = torchvision.datasets.MNIST( + root="./data", train=True, download=True, transform=transform_mnist + ) + testset = torchvision.datasets.MNIST( + root="./data", train=False, download=True, transform=transform_mnist + ) + + train_data, train_targets = extract_classes(trainset, [2, 3]) + test_data, test_targets = extract_classes(testset, [2, 3]) + train_targets -= 2 # Mapping target 2-3 to 0-1 + test_targets -= 2 # Hence, add 2 after prediction + + trainset = CustomDataset( + train_data, train_targets, transform=transform_split_mnist + ) + testset = CustomDataset( + test_data, test_targets, transform=transform_split_mnist + ) + num_classes = 2 + inputs = 1 + + elif dataset == "SplitMNIST-5.3": + trainset = torchvision.datasets.MNIST( + root="./data", train=True, download=True, transform=transform_mnist + ) + testset = torchvision.datasets.MNIST( + root="./data", train=False, download=True, transform=transform_mnist + ) + + train_data, train_targets = extract_classes(trainset, [4, 5]) + test_data, test_targets = extract_classes(testset, [4, 5]) + train_targets -= 4 # Mapping target 4-5 to 0-1 + test_targets -= 4 # Hence, add 4 after prediction + + trainset = CustomDataset( + train_data, train_targets, transform=transform_split_mnist + ) + testset = CustomDataset( + test_data, test_targets, transform=transform_split_mnist + ) + num_classes = 2 + inputs = 1 + + elif dataset == "SplitMNIST-5.4": + trainset = torchvision.datasets.MNIST( + root="./data", train=True, download=True, transform=transform_mnist + ) + testset = torchvision.datasets.MNIST( + root="./data", train=False, download=True, transform=transform_mnist + ) + + train_data, train_targets = extract_classes(trainset, [6, 7]) + test_data, test_targets = extract_classes(testset, [6, 7]) + train_targets -= 6 # Mapping target 6-7 to 0-1 + test_targets -= 6 # Hence, add 6 after prediction + + trainset = CustomDataset( + train_data, train_targets, transform=transform_split_mnist + ) + testset = CustomDataset( + test_data, test_targets, transform=transform_split_mnist + ) + num_classes = 2 + inputs = 1 + + elif dataset == "SplitMNIST-5.5": + trainset = torchvision.datasets.MNIST( + root="./data", train=True, download=True, transform=transform_mnist + ) + testset = torchvision.datasets.MNIST( + root="./data", train=False, download=True, transform=transform_mnist + ) + + train_data, train_targets = extract_classes(trainset, [8, 9]) + test_data, test_targets = extract_classes(testset, [8, 9]) + train_targets -= 8 # Mapping target 8-9 to 0-1 + test_targets -= 8 # Hence, add 8 after prediction + + trainset = CustomDataset( + train_data, train_targets, transform=transform_split_mnist + ) + testset = CustomDataset( + test_data, test_targets, transform=transform_split_mnist + ) + num_classes = 2 + inputs = 1 + + return trainset, testset, inputs, num_classes + + +def getDataloader(trainset, testset, valid_size, batch_size, num_workers): + num_train = len(trainset) + indices = list(range(num_train)) + np.random.shuffle(indices) + split = int(np.floor(valid_size * num_train)) + train_idx, valid_idx = indices[split:], indices[:split] + + train_sampler = SubsetRandomSampler(train_idx) + valid_sampler = SubsetRandomSampler(valid_idx) + + train_loader = torch.utils.data.DataLoader( + trainset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers + ) + valid_loader = torch.utils.data.DataLoader( + trainset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers + ) + test_loader = torch.utils.data.DataLoader( + testset, batch_size=batch_size, num_workers=num_workers + ) + + return train_loader, valid_loader, test_loader diff --git a/gpu_power_func.py b/gpu_power_func.py index ef7fc55..65528c5 100755 --- a/gpu_power_func.py +++ b/gpu_power_func.py @@ -53,6 +53,5 @@ def total_watt_consumed(pickle_name): break x = np.array(x) x = x[:, 0] - y = [float(re.findall("\d+.\d+", xi)[0]) for xi in x] - return sum(y) - + # y = [float(re.findall(r"\d+.\d+", xi)[0]) for xi in x] + return sum(x) diff --git a/layers/__init__.py b/layers/__init__.py new file mode 100755 index 0000000..f226595 --- /dev/null +++ b/layers/__init__.py @@ -0,0 +1,5 @@ +from .BBB.BBBConv import BBBConv2d as BBB_Conv2d +from .BBB.BBBLinear import BBBLinear as BBB_Linear +from .BBB_LRT.BBBConv import BBBConv2d as BBB_LRT_Conv2d +from .BBB_LRT.BBBLinear import BBBLinear as BBB_LRT_Linear +from .misc import FlattenLayer, ModuleWrapper diff --git a/main_bayesian.py b/main_bayesian.py index 4b86484..4009fc6 100755 --- a/main_bayesian.py +++ b/main_bayesian.py @@ -15,7 +15,7 @@ import utils from models.BayesianModels.Bayesian3Conv3FC import BBB3Conv3FC from models.BayesianModels.BayesianAlexNet import BBBAlexNet from models.BayesianModels.BayesianLeNet import BBBLeNet -from stopping_crit import accuracy_bound, e_stop, energy_bound +from stopping_crit import accuracy_bound, e_stop, efficiency_stop, energy_bound with open("configuration.pkl", "rb") as file: while True: @@ -30,7 +30,6 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") def getModel(net_type, inputs, outputs, priors, layer_type, activation_type): - print(net_type) if net_type == "lenet": return BBBLeNet( outputs, @@ -92,9 +91,7 @@ def train_model( return training_loss / len(trainloader), np.mean(accs), np.mean(kl_list) -def validate_model( - net, criterion, validloader, num_ens=1, beta_type=0.1, epoch=None, num_epochs=None -): +def validate_model(net, criterion, validloader, num_ens=1, beta_type=0.1, epoch=None, num_epochs=None): """Calculate ensemble accuracy and NLL Loss""" net.train() valid_loss = 0.0 @@ -119,7 +116,10 @@ def validate_model( def run(dataset, net_type): - + # Noise applied to dataset + noise_type = cfg["noise_type"] + mean = 0.5 + std = 0.5 # Hyper Parameter settings layer_type = cfg["model"]["layer_type"] activation_type = cfg["model"]["activation_type"] @@ -134,17 +134,13 @@ def run(dataset, net_type): batch_size = cfg["model"]["batch_size"] beta_type = cfg["model"]["beta_type"] - trainset, testset, inputs, outputs = data.getDataset(dataset) + trainset, testset, inputs, outputs = data.getDataset(dataset, noise_type, mean=mean, std=std) train_loader, valid_loader, test_loader = data.getDataloader( trainset, testset, valid_size, batch_size, num_workers ) - net = getModel(net_type, inputs, outputs, priors, layer_type, activation_type).to( - device - ) + net = getModel(net_type, inputs, outputs, priors, layer_type, activation_type).to(device) ckpt_dir = f"checkpoints/{dataset}/bayesian" - ckpt_name = f'checkpoints/{dataset}/bayesian/model_{net_type}_{layer_type}\ - _{activation_type}_{cfg["model"]["size"]}.pt' if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir, exist_ok=True) @@ -191,6 +187,10 @@ def run(dataset, net_type): ) ) + ckpt_name = f'checkpoints/{dataset}/bayesian/model_{net_type}_{layer_type}_{activation_type}_{cfg["model"]["size"]}_epoch_{epoch}_noise_{noise_type}.pt' + if sav == 1: + torch.save(net.state_dict(), ckpt_name) + if stp == 2: # print("Using early stopping") if e_stop(early_stop, valid_acc, epoch + 1, 2, cfg["model"]["sens"]) == 1: @@ -200,18 +200,22 @@ def run(dataset, net_type): if energy_bound(cfg["model"]["energy_thrs"]) == 1: break elif stp == 4: - # print("Using accuracy bound") - if accuracy_bound(train_acc, cfg.acc_thrs) == 1: + if dataset == "MNIST": + # print("Using accuracy bound") + if accuracy_bound(train_acc, 0.99) == 1: + break + else: + # print("Using accuracy bound") + if accuracy_bound(train_acc, 0.50) == 1: + break + elif stp == 5: + # print("Using efficiency stoping") + if efficiency_stop(net, train_acc, batch_size, 0.002) == 1: break else: - print("Training for {} epochs".format(cfg["model"]["n_epochs"])) + print(f"Training for {cfg['model']['n_epochs']} epochs") - if sav == 1: - # save model when finished - if epoch == cfg.n_epochs - 1: - torch.save(net.state_dict(), ckpt_name) - - with open("bayes_exp_data_" + str(cfg["model"]["size"]) + ".pkl", "wb") as f: + with open("bayes_exp_data_" + str(cfg["model"]["size"]) + f"_{dataset}" + ".pkl", "wb") as f: pickle.dump(train_data, f) diff --git a/main_frequentist.py b/main_frequentist.py index e489f03..68b58f1 100755 --- a/main_frequentist.py +++ b/main_frequentist.py @@ -14,7 +14,7 @@ import metrics from models.NonBayesianModels.AlexNet import AlexNet from models.NonBayesianModels.LeNet import LeNet from models.NonBayesianModels.ThreeConvThreeFC import ThreeConvThreeFC -from stopping_crit import accuracy_bound, e_stop, energy_bound +from stopping_crit import accuracy_bound, e_stop, efficiency_stop, energy_bound with open("configuration.pkl", "rb") as file: while True: @@ -71,7 +71,10 @@ def validate_model(net, criterion, valid_loader): def run(dataset, net_type): - + # Noise on dataset + noise_type = None + mean = 0.5 + std = 1 # Hyper Parameter settings n_epochs = cfg["model"]["n_epochs"] lr = cfg["model"]["lr"] @@ -79,15 +82,13 @@ def run(dataset, net_type): valid_size = cfg["model"]["valid_size"] batch_size = cfg["model"]["batch_size"] - trainset, testset, inputs, outputs = data.getDataset(dataset) + trainset, testset, inputs, outputs = data.getDataset(dataset, noise_type, mean=mean, std=std) train_loader, valid_loader, test_loader = data.getDataloader( trainset, testset, valid_size, batch_size, num_workers ) net = getModel(net_type, inputs, outputs).to(device) ckpt_dir = f"checkpoints/{dataset}/frequentist" - ckpt_name = f'checkpoints/{dataset}/frequentist/model\ - _{net_type}_{cfg["model"]["size"]}.pt' if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir, exist_ok=True) @@ -120,6 +121,10 @@ def run(dataset, net_type): ) ) + ckpt_name = f'checkpoints/{dataset}/frequentist/model_{net_type}_{cfg["model"]["size"]}_epoch_{epoch}_noise_{noise_type}.pt' + if sav == 1: + torch.save(net.state_dict(), ckpt_name) + if stp == 2: # print("Using early stopping") if e_stop(early_stop, valid_acc, epoch, 2, cfg["model"]["sens"]) == 1: @@ -129,18 +134,22 @@ def run(dataset, net_type): if energy_bound(cfg["model"]["energy_thrs"]) == 1: break elif stp == 4: - # print('Using accuracy bound') - if accuracy_bound(train_acc, cfg["model"]["acc_thrs"]) == 1: + if dataset == "MNIST": + # print("Using accuracy bound") + if accuracy_bound(train_acc, 0.99) == 1: + break + else: + # print("Using accuracy bound") + if accuracy_bound(train_acc, 0.50) == 1: + break + elif stp == 5: + # print("Using efficiency bound") + if efficiency_stop(net, train_acc, batch_size, 0.25) == 1: break else: print("Training for {} epochs".format(cfg["model"]["n_epochs"])) - if sav == 1: - # save model when finished - if epoch <= n_epochs: - torch.save(net.state_dict(), ckpt_name) - - with open("freq_exp_data_" + str(cfg["model"]["size"]) + ".pkl", "wb") as f: + with open("freq_exp_data_" + str(cfg["model"]["size"]) + f"_{dataset}" + ".pkl", "wb") as f: pickle.dump(train_data, f) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a02dabf --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,9 @@ +[tool.pylint.'FORMAT'] +max-line-length = 110 + +[tool.pylint.'MESSAGES CONTROL'] +disable = ["missing-module-docstring", "missing-function-docstring", "import-error"] + +[tool.black] +line-length = 110 + diff --git a/read_pickle.py b/read_pickle.py index 4dff651..f099772 100755 --- a/read_pickle.py +++ b/read_pickle.py @@ -1,15 +1,17 @@ import pickle gpu_data = [] -with (open("bayesian_wattdata_3.pkl", "rb")) as openfile: +with open("configuration.pkl", "rb") as openfile: while True: try: gpu_data = pickle.load(openfile) except EOFError: break -#exp_data = [] -#with (open("bayes_exp_data_6.pkl", "rb")) as openfile: + +print(gpu_data) +# exp_data = [] +# with (open("bayes_exp_data_6.pkl", "rb")) as openfile: # while True: # try: # exp_data = pickle.load(openfile) diff --git a/run_service.py b/run_service.py index 2a946a7..fa7927d 100755 --- a/run_service.py +++ b/run_service.py @@ -1,9 +1,12 @@ -import psutil import pickle -import arguments -from time import sleep -#from pathlib import Path + +# from pathlib import Path import subprocess as sub +from time import sleep + +import psutil + +import arguments from arguments import makeArguments @@ -15,30 +18,36 @@ def kill(proc_pid): cfg = { - "model": {"net_type": None, "type": None, "size": None, "layer_type": - "lrt", "activation_type": "softplus", "priors": { - 'prior_mu': 0, - 'prior_sigma': 0.1, - 'posterior_mu_initial': (0, 0.1), # (mean,std) normal_ - 'posterior_rho_initial': (-5, 0.1), # (mean,std) normal_ - }, - "n_epochs": 100, - "sens": 1e-9, - "energy_thrs": 10000, - "acc_thrs": 0.99, - "lr": 0.001, - "num_workers": 4, - "valid_size": 0.2, - "batch_size": 256, - "train_ens": 1, - "valid_ens": 1, - "beta_type": 0.1, # 'Blundell','Standard',etc. - # Use float for const value - }, - "data": None, - "stopping_crit": None, - "save": None, - "pickle_path": None, + "model": { + "net_type": None, + "type": None, + "size": None, + "layer_type": "lrt", + "activation_type": "softplus", + "priors": { + "prior_mu": 0, + "prior_sigma": 0.1, + "posterior_mu_initial": (0, 0.1), # (mean,std) normal_ + "posterior_rho_initial": (-5, 0.1), # (mean,std) normal_ + }, + "n_epochs": 100, + "sens": 1e-9, + "energy_thrs": 100000, + "acc_thrs": 0.99, + "lr": 0.001, + "num_workers": 4, + "valid_size": 0.2, + "batch_size": 256, + "train_ens": 1, + "valid_ens": 1, + "beta_type": 0.1, # 'Blundell','Standard',etc. + # Use float for const value + }, + "data": None, + "noise_type": None, + "stopping_crit": None, + "save": None, + "pickle_path": None, } args = makeArguments(arguments.all_args) @@ -47,10 +56,10 @@ check = list(args.values()) if all(v is None for v in check): raise Exception("One argument required") elif None in check: - if args['f'] is not None: + if args["f"] is not None: cmd = ["python", "main_frequentist.py"] cfg["model"]["type"] = "freq" - elif args['b'] is not None: + elif args["b"] is not None: cmd = ["python", "main_bayesian.py"] cfg["model"]["type"] = "bayes" else: @@ -61,26 +70,30 @@ wide = args["f"] or args["b"] cfg["model"]["size"] = wide cfg["data"] = args["dataset"] +cfg["noise_type"] = args["noise_type"] cfg["model"]["net_type"] = args["net_type"] -if args['EarlyStopping']: +if args["EarlyStopping"]: cfg["stopping_crit"] = 2 -elif args['EnergyBound']: +elif args["EnergyBound"]: cfg["stopping_crit"] = 3 -elif args['AccuracyBound']: +elif args["AccuracyBound"]: cfg["stopping_crit"] = 4 +elif args["EfficiencyStopping"]: + cfg["stopping_crit"] = 5 else: cfg["stopping_crit"] = 1 -if args['Save']: +if args["Save"]: cfg["save"] = 1 else: cfg["save"] = 0 -cfg["pickle_path"] = "{}_wattdata_{}.pkl".format(cfg["model"]["type"], - cfg["model"]["size"]) +cfg["pickle_path"] = ( + f"{cfg['model']['type']}_wattdata_{cfg['model']['size']}_{cfg['data']}.pkl" +) with open("configuration.pkl", "wb") as f: pickle.dump(cfg, f) @@ -94,34 +107,35 @@ cpu_watt = "cpu_watt.sh" ram = "mem_free.sh" gpu = "radeontop.sh" -#path_cpu_watt = Path(cpu_watt) -#path_ram = Path(ram) -#path_gpu = Path(gpu) +# path_cpu_watt = Path(cpu_watt) +# path_ram = Path(ram) +# path_gpu = Path(gpu) -#path_cpu_watt = str(Path(cpu_watt).absolute()) + '/' + cpu_watt -#path_ram = str(Path(ram).absolute()) + '/' + ram -#path_gpu = str(Path(gpu).absolute()) + '/' + gpu +# path_cpu_watt = str(Path(cpu_watt).absolute()) + '/' + cpu_watt +# path_ram = str(Path(ram).absolute()) + '/' + ram +# path_gpu = str(Path(gpu).absolute()) + '/' + gpu if cmd[1] == "main_frequentist.py": - cmd2 = ['./'+cpu_watt, "freq_{}_cpu_watts".format(wide)] - cmd3 = ['./'+ram, "freq_{}_ram_use".format(wide)] - cmd4 = ['./'+gpu, "freq_{}_flop_app".format(wide)] + cmd2 = ["./" + cpu_watt, f"freq_{wide}_cpu_watts_{cfg['data']}"] + cmd3 = ["./" + ram, f"freq_{wide}_ram_use_{cfg['data']}"] + cmd4 = ["./" + gpu, f"freq_{wide}_flop_app_{cfg['data']}"] elif cmd[1] == "main_bayesian.py": - cmd2 = ['./'+cpu_watt, "bayes_{}_cpu_watts".format(wide)] - cmd3 = ['./'+ram, "bayes_{}_ram_use".format(wide)] - cmd4 = ['./'+gpu, "bayes_{}_flop_app".format(wide)] + cmd2 = ["./" + cpu_watt, f"bayes_{wide}_cpu_watts_{cfg['data']}"] + cmd3 = ["./" + ram, f"bayes_{wide}_ram_use_{cfg['data']}"] + cmd4 = ["./" + gpu, f"bayes_{wide}_flop_app_{cfg['data']}"] -path = sub.check_output(['pwd']) +path = sub.check_output(["pwd"]) path = path.decode() -path = path.replace('\n', '') +path = path.replace("\n", "") -startWattCounter = 'python ' + path + '/amd_sample_draw.py' +startWattCounter = "python " + path + "/amd_sample_draw.py" p1 = sub.Popen(cmd) -p2 = sub.Popen(startWattCounter.split(), stdin=sub.PIPE, stdout=sub.PIPE, - stderr=sub.PIPE) +p2 = sub.Popen( + startWattCounter.split(), stdin=sub.PIPE, stdout=sub.PIPE, stderr=sub.PIPE +) p3 = sub.Popen(cmd2, stdin=sub.PIPE, stdout=sub.PIPE, stderr=sub.PIPE) p4 = sub.Popen(cmd3, stdin=sub.PIPE, stdout=sub.PIPE, stderr=sub.PIPE) p5 = sub.Popen(cmd4, stdin=sub.PIPE, stdout=sub.PIPE, stderr=sub.PIPE) diff --git a/stopping_crit.py b/stopping_crit.py index 77779b5..6614522 100755 --- a/stopping_crit.py +++ b/stopping_crit.py @@ -1,3 +1,4 @@ +# import math import pickle from time import sleep @@ -11,28 +12,69 @@ with open("configuration.pkl", "rb") as file: break -def non_decreasing(L): - return all(x <= y for x, y in zip(L, L[1:])) +def non_decreasing(lst: list): + """ + Check that a list is non decreasing + """ + return all(x <= y for x, y in zip(lst, lst[1:])) -def non_increasing(L): - return all(x >= y for x, y in zip(L, L[1:])) +def non_increasing(lst): + """ + Check that a list is non inreasing + """ + return all(x >= y for x, y in zip(lst, lst[1:])) -def monotonic(L): - return non_decreasing(L) or non_increasing(L) +def monotonic(lst): + """ + Check that a list is monotonic + """ + return non_decreasing(lst) or non_increasing(lst) -def strictly_increasing(L): - return all(x < y for x, y in zip(L, L[1:])) +def strictly_increasing(lst): + """ + Check that a list is strictly inreasing + """ + return all(x < y for x, y in zip(lst, lst[1:])) -def strictly_decreasing(L): - return all(x > y for x, y in zip(L, L[1:])) +def strictly_decreasing(lst): + """ + Check that a list is strictly decreasing + """ + return all(x > y for x, y in zip(lst, lst[1:])) -def strictly_monotonic(L): - return strictly_increasing(L) or strictly_decreasing(L) +def strictly_monotonic(lst): + """ + Check that a list is strictly monotonic + """ + return strictly_increasing(lst) or strictly_decreasing(lst) + + +def count_parameters(model): + """Counts model amount of trainable parameters""" + return sum(p.numel() for p in model.parameters() if p.requires_grad) + + +def efficiency_stop(model, accuracy, batch, sensitivity=0.001): + """ + This function stops when a certain amount of generalization takes place + taking into account the model efficiency + """ + try: + energy = total_watt_consumed(cfg["pickle_path"]) + except Exception as e: + sleep(3) + energy = total_watt_consumed(cfg["pickle_path"]) + efficiency = accuracy / energy + print(f"Current Efficiency: {1 - efficiency}") + no_parameters = count_parameters(model) + if (efficiency * no_parameters / (batch / 2) >= sensitivity) and (accuracy >= 0.5): + return 1 + return 0 def e_stop( @@ -42,6 +84,9 @@ def e_stop( patience: int = 4, sensitivity: float = 1e-9, ): + """ + This function stops training early + """ early_stopping.append(train_acc) if patience in (0, 1): print("Stopping Early") @@ -69,7 +114,7 @@ def energy_bound(threshold: float = 100000.0): except Exception as e: sleep(3) energy = total_watt_consumed(cfg["pickle_path"]) - print("Energy used: {}".format(energy)) + print(f"Energy used: {energy}") if energy > threshold: print("Energy bound achieved") return 1 @@ -77,6 +122,7 @@ def energy_bound(threshold: float = 100000.0): def accuracy_bound(train_acc: float, threshold: float = 0.99): + """Stops training when a specified amount of accuracy is achieved""" if train_acc >= threshold: print("Accuracy bound achieved") return 1