Update for 04-16-22

This commit is contained in:
Eduardo Cueto-Mendoza 2022-04-16 13:20:44 +01:00
commit 42e4594e86
32 changed files with 2458 additions and 0 deletions

9
.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
__pycache__/
checkpoints/
data/
data_budget/
experiment-power-draw/
**/__pycache__/
**/__init__.py
**/**/__pycache__/
**/**/__init__.py

8
Mixtures/config_mixtures.py Executable file
View File

@ -0,0 +1,8 @@
############### Configuration file for Training of SplitMNIST and Mixtures ###############
n_epochs = 5
lr_start = 0.001
num_workers = 4
valid_size = 0.2
batch_size = 256
train_ens = 15
valid_ens = 10

305
Mixtures/gmm.py Executable file
View File

@ -0,0 +1,305 @@
# Courtesy of https://github.com/ldeecke/gmm-torch
import torch
import numpy as np
from math import pi
class GaussianMixture(torch.nn.Module):
"""
Fits a mixture of k=1,..,K Gaussians to the input data. Input tensors are expected to be flat with dimensions (n: number of samples, d: number of features).
The model then extends them to (n, k: number of components, d).
The model parametrization (mu, sigma) is stored as (1, k, d), and probabilities are shaped (n, k, 1) if they relate to an individual sample, or (1, k, 1) if they assign membership probabilities to one of the mixture components.
"""
def __init__(self, n_components, n_features, mu_init=None, var_init=None, eps=1.e-6):
"""
Initializes the model and brings all tensors into their required shape. The class expects data to be fed as a flat tensor in (n, d). The class owns:
x: torch.Tensor (n, k, d)
mu: torch.Tensor (1, k, d)
var: torch.Tensor (1, k, d)
pi: torch.Tensor (1, k, 1)
eps: float
n_components: int
n_features: int
score: float
args:
n_components: int
n_features: int
mu_init: torch.Tensor (1, k, d)
var_init: torch.Tensor (1, k, d)
eps: float
"""
super(GaussianMixture, self).__init__()
self.eps = eps
self.n_components = n_components
self.n_features = n_features
self.log_likelihood = -np.inf
self.mu_init = mu_init
self.var_init = var_init
self._init_params()
def _init_params(self):
if self.mu_init is not None:
assert self.mu_init.size() == (1, self.n_components, self.n_features), "Input mu_init does not have required tensor dimensions (1, %i, %i)" % (self.n_components, self.n_features)
# (1, k, d)
self.mu = torch.nn.Parameter(self.mu_init, requires_grad=False)
else:
self.mu = torch.nn.Parameter(torch.randn(1, self.n_components, self.n_features), requires_grad=False)
if self.var_init is not None:
assert self.var_init.size() == (1, self.n_components, self.n_features), "Input var_init does not have required tensor dimensions (1, %i, %i)" % (self.n_components, self.n_features)
# (1, k, d)
self.var = torch.nn.Parameter(self.var_init, requires_grad=False)
else:
self.var = torch.nn.Parameter(torch.ones(1, self.n_components, self.n_features), requires_grad=False)
# (1, k, 1)
self.pi = torch.nn.Parameter(torch.Tensor(1, self.n_components, 1), requires_grad=False).fill_(1./self.n_components)
self.params_fitted = False
def bic(self, x):
"""
Bayesian information criterion for samples x.
args:
x: torch.Tensor (n, d) or (n, k, d)
returns:
bic: float
"""
n = x.shape[0]
if len(x.size()) == 2:
# (n, d) --> (n, k, d)
x = x.unsqueeze(1).expand(n, self.n_components, x.size(1))
bic = -2. * self.__score(self.pi, self.__p_k(x, self.mu, self.var), sum_data=True) * n + self.n_components * np.log(n)
return bic
def fit(self, x, warm_start=False, delta=1e-8, n_iter=1000):
"""
Public method that fits data to the model.
args:
n_iter: int
delta: float
"""
if not warm_start and self.params_fitted:
self._init_params()
if len(x.size()) == 2:
# (n, d) --> (n, k, d)
x = x.unsqueeze(1).expand(x.size(0), self.n_components, x.size(1))
i = 0
j = np.inf
while (i <= n_iter) and (j >= delta):
log_likelihood_old = self.log_likelihood
mu_old = self.mu
var_old = self.var
self.__em(x)
self.log_likelihood = self.__score(self.pi, self.__p_k(x, self.mu, self.var))
if (self.log_likelihood.abs() == float("Inf")) or (self.log_likelihood == float("nan")):
# when the log-likelihood assumes inane values, reinitialize model
self.__init__(self.n_components, self.n_features)
i += 1
j = self.log_likelihood - log_likelihood_old
if j <= delta:
# when the score decreases, revert to old parameters
self.__update_mu(mu_old)
self.__update_var(var_old)
self.params_fitted = True
def predict(self, x, probs=False):
"""
Assigns input data to one of the mixture components by evaluating the likelihood under each. If probs=True returns normalized probabilities of class membership instead.
args:
x: torch.Tensor (n, d) or (n, k, d)
probs: bool
returns:
y: torch.LongTensor (n)
"""
if len(x.size()) == 2:
# (n, d) --> (n, k, d)
x = x.unsqueeze(1).expand(x.size(0), self.n_components, x.size(1))
p_k = self.__p_k(x, self.mu, self.var)
if probs:
return p_k / (p_k.sum(1, keepdim=True) + self.eps)
else:
_, predictions = torch.max(p_k, 1)
return torch.squeeze(predictions).type(torch.LongTensor)
def predict_proba(self, x):
"""
Returns normalized probabilities of class membership.
args:
x: torch.Tensor (n, d) or (n, k, d)
returns:
y: torch.LongTensor (n)
"""
return self.predict(x, probs=True)
def score_samples(self, x):
"""
Computes log-likelihood of data (x) under the current model.
args:
x: torch.Tensor (n, d) or (n, k, d)
returns:
score: torch.LongTensor (n)
"""
if len(x.size()) == 2:
# (n, d) --> (n, k, d)
x = x.unsqueeze(1).expand(x.size(0), self.n_components, x.size(1))
score = self.__score(self.pi, self.__p_k(x, self.mu, self.var), sum_data=False)
return score
def __p_k(self, x, mu, var):
"""
Returns a tensor with dimensions (n, k, 1) indicating the likelihood of data belonging to the k-th Gaussian.
args:
x: torch.Tensor (n, k, d)
mu: torch.Tensor (1, k, d)
var: torch.Tensor (1, k, d)
returns:
p_k: torch.Tensor (n, k, 1)
"""
# (1, k, d) --> (n, k, d)
mu = mu.expand(x.size(0), self.n_components, self.n_features)
var = var.expand(x.size(0), self.n_components, self.n_features)
# (n, k, d) --> (n, k, 1)
exponent = torch.exp(-.5 * torch.sum((x - mu) * (x - mu) / var, 2, keepdim=True))
# (n, k, d) --> (n, k, 1)
prefactor = torch.rsqrt(((2. * pi) ** self.n_features) * torch.prod(var, dim=2, keepdim=True) + self.eps)
return prefactor * exponent
def __e_step(self, pi, p_k):
"""
Computes weights that indicate the probabilistic belief that a data point was generated by one of the k mixture components. This is the so-called expectation step of the EM-algorithm.
args:
pi: torch.Tensor (1, k, 1)
p_k: torch.Tensor (n, k, 1)
returns:
weights: torch.Tensor (n, k, 1)
"""
weights = pi * p_k
return torch.div(weights, torch.sum(weights, 1, keepdim=True) + self.eps)
def __m_step(self, x, weights):
"""
Updates the model's parameters. This is the maximization step of the EM-algorithm.
args:
x: torch.Tensor (n, k, d)
weights: torch.Tensor (n, k, 1)
returns:
pi_new: torch.Tensor (1, k, 1)
mu_new: torch.Tensor (1, k, d)
var_new: torch.Tensor (1, k, d)
"""
# (n, k, 1) --> (1, k, 1)
n_k = torch.sum(weights, 0, keepdim=True)
pi_new = torch.div(n_k, torch.sum(n_k, 1, keepdim=True) + self.eps)
# (n, k, d) --> (1, k, d)
mu_new = torch.div(torch.sum(weights * x, 0, keepdim=True), n_k + self.eps)
# (n, k, d) --> (1, k, d)
var_new = torch.div(torch.sum(weights * (x - mu_new) * (x - mu_new), 0, keepdim=True), n_k + self.eps)
return pi_new, mu_new, var_new
def __em(self, x):
"""
Performs one iteration of the expectation-maximization algorithm by calling the respective subroutines.
args:
x: torch.Tensor (n, k, d)
"""
weights = self.__e_step(self.pi, self.__p_k(x, self.mu, self.var))
pi_new, mu_new, var_new = self.__m_step(x, weights)
self.__update_pi(pi_new)
self.__update_mu(mu_new)
self.__update_var(var_new)
def __score(self, pi, p_k, sum_data=True):
"""
Computes the log-likelihood of the data under the model.
args:
pi: torch.Tensor (1, k, 1)
p_k: torch.Tensor (n, k, 1)
"""
weights = pi * p_k
if sum_data:
return torch.sum(torch.log(torch.sum(weights, 1) + self.eps))
else:
return torch.log(torch.sum(weights, 1) + self.eps)
def __update_mu(self, mu):
"""
Updates mean to the provided value.
args:
mu: torch.FloatTensor
"""
assert mu.size() in [(self.n_components, self.n_features), (1, self.n_components, self.n_features)], "Input mu does not have required tensor dimensions (%i, %i) or (1, %i, %i)" % (self.n_components, self.n_features, self.n_components, self.n_features)
if mu.size() == (self.n_components, self.n_features):
self.mu = mu.unsqueeze(0)
elif mu.size() == (1, self.n_components, self.n_features):
self.mu.data = mu
def __update_var(self, var):
"""
Updates variance to the provided value.
args:
var: torch.FloatTensor
"""
assert var.size() in [(self.n_components, self.n_features), (1, self.n_components, self.n_features)], "Input var does not have required tensor dimensions (%i, %i) or (1, %i, %i)" % (self.n_components, self.n_features, self.n_components, self.n_features)
if var.size() == (self.n_components, self.n_features):
self.var = var.unsqueeze(0)
elif var.size() == (1, self.n_components, self.n_features):
self.var.data = var
def __update_pi(self, pi):
"""
Updates pi to the provided value.
args:
pi: torch.FloatTensor
"""
assert pi.size() in [(1, self.n_components, 1)], "Input pi does not have required tensor dimensions (%i, %i, %i)" % (1, self.n_components, 1)
self.pi.data = pi

117
Mixtures/main.py Executable file
View File

@ -0,0 +1,117 @@
import sys
sys.path.append('..')
import os
import torch
import numpy as np
from collections import OrderedDict
import gmm
import utils
import utils_mixture
import config_bayesian as cfg
def feedforward_and_save_mean_var(net, dataloader, task_no, num_ens=1):
cfg.mean_var_dir = "Mixtures/mean_vars/task-{}/".format(task_no)
if not os.path.exists(cfg.mean_var_dir):
os.makedirs(cfg.mean_var_dir, exist_ok=True)
cfg.record_mean_var = True
cfg.record_layers = None # All layers
cfg.record_now = True
cfg.curr_batch_no = 0 # Not required
cfg.curr_epoch_no = 0 # Not required
net.train() # To get distribution of mean and var
accs = []
for i, (inputs, labels) in enumerate(dataloader):
inputs, labels = inputs.to(device), labels.to(device)
outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).to(device)
kl = 0.0
for j in range(num_ens):
net_out, _kl = net(inputs)
kl += _kl
outputs[:, :, j] = F.log_softmax(net_out, dim=1).data
log_outputs = utils.logmeanexp(outputs, dim=2)
accs.append(metrics.acc(log_outputs, labels))
return np.mean(accs)
def _get_ordered_layer_name(mean_var_path):
# Order files according to creation time
files = os.listdir(mean_var_path)
files = [os.path.join(mean_var_path, f) for f in files]
files.sort(key=os.path.getctime)
layer_names = [f.split('/')[-1].split('.')[0] for f in files]
return layer_names
def _get_layer_wise_mean_var_per_task(mean_var_path):
# Order files according to creation time
# To get the correct model architecture
files = os.listdir(mean_var_path)
files = [os.path.join(mean_var_path, f) for f in files]
files.sort(key=os.path.getctime)
layer_names = [f.split('/')[-1].split('.')[0] for f in files]
mean_var = OrderedDict()
for i in range(len(files)):
data = {}
mean, var = utils.load_mean_std_from_file(files[i])
mean, var = np.vstack(mean), np.vstack(var) # shape is (len(trainset), output shape)
data['mean'] = mean
data['var'] = var
data['mean.mu'] = np.mean(mean, axis=0)
data['var.mu'] = np.mean(var, axis=0)
data['mean.var'] = np.var(mean, axis=0)
data['var.var'] = np.var(var, axis=0)
mean_var[layer_names[i]] = data
return mean_var
def get_mean_vars_for_all_tasks(mean_var_dir):
all_tasks = {}
for task in os.listdir(mean_var_dir):
path_to_task = os.path.join(mean_var_dir, task)
mean_var_per_task = _get_layer_wise_mean_var_per_task(path_to_task)
all_tasks[task] = mean_var_per_task
return all_tasks
def fit_to_gmm(num_tasks, layer_name, data_type, all_tasks):
data = np.vstack([all_tasks[f'task-{i}'][layer_name][data_type] for i in range(1, num_tasks+1)])
data = torch.tensor(data).float()
#data_mu = torch.cat([torch.tensor(all_tasks[f'task-{i}'][layer_name][data_type+'.mu']).unsqueeze(0) for i in range(1, num_tasks+1)], dim=0).float().unsqueeze(0)
#data_var = torch.cat([torch.tensor(all_tasks[f'task-{i}'][layer_name][data_type+'.var']).unsqueeze(0) for i in range(1, num_tasks+1)], dim=0).float().unsqueeze(0)
model = gmm.GaussianMixture(n_components=num_tasks, n_features=np.prod(data.shape[1:]))#, mu_init=data_mu, var_init=data_var)
data = data[torch.randperm(data.size()[0])] # Shuffling of data
model.fit(data)
return model.predict(data)
def main():
num_tasks = 2
weights_dir = "checkpoints/MNIST/bayesian/splitted/2-tasks/"
loader_task1, loader_task2 = utils_mixture.get_splitmnist_dataloaders(num_tasks)
train_loader_task1 = loader_task1[0]
train_loader_task2 = loader_task2[0]
net_task1, net_task2 = utils_mixture.get_splitmnist_models(num_tasks, True, weights_dir)
net_task1.cuda()
net_task2.cuda()
print("Task-1 Accuracy:", feedforward_and_save_mean_var(net_task1, train_loader_task1, task_no=1))
print("Task-2 Accuracy:", feedforward_and_save_mean_var(net_task2, train_loader_task2, task_no=2))
mean_vars_all_tasks = get_mean_vars_for_all_tasks("Mixtures/mean_vars/")
if __name__ == '__main__':
all_tasks = get_mean_vars_for_all_tasks("Mixtures/mean_vars/")
y = fit_to_gmm(2, 'fc3', 'mean', all_tasks)
print("Cluster0", (1-y).sum())
print("Cluster1", y.sum())

171
Mixtures/mixture_experiment.py Executable file
View File

@ -0,0 +1,171 @@
import sys
sys.path.append('..')
import os
import datetime
import torch
import contextlib
from utils_mixture import *
from layers.BBBLinear import BBBLinear
@contextlib.contextmanager
def print_to_logfile(file):
# capture all outputs to a log file while still printing it
class Logger:
def __init__(self, file):
self.terminal = sys.stdout
self.log = file
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def __getattr__(self, attr):
return getattr(self.terminal, attr)
logger = Logger(file)
_stdout = sys.stdout
sys.stdout = logger
try:
yield logger.log
finally:
sys.stdout = _stdout
def initiate_experiment(experiment):
def decorator(*args, **kwargs):
log_file_dir = "experiments/mixtures/"
log_file = log_file_dir + experiment.__name__ + ".txt"
if not os.path.exists(log_file):
os.makedirs(log_file_dir, exist_ok=True)
with print_to_logfile(open(log_file, 'a')):
print("Performing experiment:", experiment.__name__)
print("Date-Time:", datetime.datetime.now())
print("\n", end="")
print("Args:", args)
print("Kwargs:", kwargs)
print("\n", end="")
experiment(*args, **kwargs)
print("\n\n", end="")
return decorator
@initiate_experiment
def experiment_regular_prediction_bayesian(weights_dir=None, num_ens=10):
num_tasks = 2
weights_dir = "checkpoints/MNIST/bayesian/splitted/2-tasks/" if weights_dir is None else weights_dir
loaders1, loaders2 = get_splitmnist_dataloaders(num_tasks)
net1, net2 = get_splitmnist_models(num_tasks, bayesian=True, pretrained=True, weights_dir=weights_dir)
net1.cuda()
net2.cuda()
print("Model-1, Task-1-Dataset=> Accuracy:", predict_regular(net1, loaders1[1], bayesian=True, num_ens=num_ens))
print("Model-2, Task-2-Dataset=> Accuracy:", predict_regular(net2, loaders2[1], bayesian=True, num_ens=num_ens))
@initiate_experiment
def experiment_regular_prediction_frequentist(weights_dir=None):
num_tasks = 2
weights_dir = "checkpoints/MNIST/frequentist/splitted/2-tasks/" if weights_dir is None else weights_dir
loaders1, loaders2 = get_splitmnist_dataloaders(num_tasks)
net1, net2 = get_splitmnist_models(num_tasks, bayesian=False, pretrained=True, weights_dir=weights_dir)
net1.cuda()
net2.cuda()
print("Model-1, Task-1-Dataset=> Accuracy:", predict_regular(net1, loaders1[1], bayesian=False))
print("Model-2, Task-2-Dataset=> Accuracy:", predict_regular(net2, loaders2[1], bayesian=False))
@initiate_experiment
def experiment_simultaneous_without_mixture_model_with_uncertainty(uncertainty_type="epistemic_softmax", T=25, weights_dir=None):
num_tasks = 2
weights_dir = "checkpoints/MNIST/bayesian/splitted/2-tasks/" if weights_dir is None else weights_dir
loaders1, loaders2 = get_splitmnist_dataloaders(num_tasks)
net1, net2 = get_splitmnist_models(num_tasks, True, True, weights_dir)
net1.cuda()
net2.cuda()
print("Both Models, Task-1-Dataset=> Accuracy: {:.3}\tModel-1-Preferred: {:.3}\tModel-2-Preferred: {:.3}\t" \
"Task-1-Dataset-Uncertainty: {:.3}\tTask-2-Dataset-Uncertainty: {:.3}".format(
*predict_using_uncertainty_separate_models(net1, net2, loaders1[1], uncertainty_type=uncertainty_type, T=T)))
print("Both Models, Task-2-Dataset=> Accuracy: {:.3}\tModel-1-Preferred: {:.3}\tModel-2-Preferred: {:.3}\t" \
"Task-1-Dataset-Uncertainty: {:.3}\tTask-2-Dataset-Uncertainty: {:.3}".format(
*predict_using_uncertainty_separate_models(net1, net2, loaders2[1], uncertainty_type=uncertainty_type, T=T)))
@initiate_experiment
def experiment_simultaneous_without_mixture_model_with_confidence(weights_dir=None):
num_tasks = 2
weights_dir = "checkpoints/MNIST/frequentist/splitted/2-tasks/" if weights_dir is None else weights_dir
loaders1, loaders2 = get_splitmnist_dataloaders(num_tasks)
net1, net2 = get_splitmnist_models(num_tasks, False, True, weights_dir)
net1.cuda()
net2.cuda()
print("Both Models, Task-1-Dataset=> Accuracy: {:.3}\tModel-1-Preferred: {:.3}\tModel-2-Preferred: {:.3}".format(
*predict_using_confidence_separate_models(net1, net2, loaders1[1])))
print("Both Models, Task-2-Dataset=> Accuracy: {:.3}\tModel-1-Preferred: {:.3}\tModel-2-Preferred: {:.3}".format(
*predict_using_confidence_separate_models(net1, net2, loaders2[1])))
@initiate_experiment
def wip_experiment_average_weights_mixture_model():
num_tasks = 2
weights_dir = "checkpoints/MNIST/bayesian/splitted/2-tasks/"
loaders1, loaders2 = get_splitmnist_dataloaders(num_tasks)
net1, net2 = get_splitmnist_models(num_tasks, True, weights_dir)
net1.cuda()
net2.cuda()
net_mix = get_mixture_model(num_tasks, weights_dir, include_last_layer=True)
net_mix.cuda()
print("Model-1, Loader-1:", calculate_accuracy(net1, loaders1[1]))
print("Model-2, Loader-2:", calculate_accuracy(net2, loaders2[1]))
print("Model-1, Loader-2:", calculate_accuracy(net1, loaders2[1]))
print("Model-2, Loader-1:", calculate_accuracy(net2, loaders1[1]))
print("Model-Mix, Loader-1:", calculate_accuracy(net_mix, loaders1[1]))
print("Model-Mix, Loader-2:", calculate_accuracy(net_mix, loaders2[1]))
@initiate_experiment
def wip_experiment_simultaneous_average_weights_mixture_model_with_uncertainty():
num_tasks = 2
weights_dir = "checkpoints/MNIST/bayesian/splitted/2-tasks/"
loaders1, loaders2 = get_splitmnist_dataloaders(num_tasks)
net1, net2 = get_splitmnist_models(num_tasks, True, weights_dir)
net1.cuda()
net2.cuda()
net_mix = get_mixture_model(num_tasks, weights_dir, include_last_layer=False)
net_mix.cuda()
# Creating 2 sets of last layer
fc3_1 = BBBLinear(84, 5, name='fc3_1') # hardcoded for lenet
weights_1 = torch.load(weights_dir + "model_lenet_2.1.pt")
fc3_1.W = torch.nn.Parameter(weights_1['fc3.W'])
fc3_1.log_alpha = torch.nn.Parameter(weights_1['fc3.log_alpha'])
fc3_2 = BBBLinear(84, 5, name='fc3_2') # hardcoded for lenet
weights_2 = torch.load(weights_dir + "model_lenet_2.2.pt")
fc3_2.W = torch.nn.Parameter(weights_2['fc3.W'])
fc3_2.log_alpha = torch.nn.Parameter(weights_2['fc3.log_alpha'])
fc3_1, fc3_2 = fc3_1.cuda(), fc3_2.cuda()
print("Model-1, Loader-1:", calculate_accuracy(net1, loaders1[1]))
print("Model-2, Loader-2:", calculate_accuracy(net2, loaders2[1]))
print("Model-Mix, Loader-1:", predict_using_epistemic_uncertainty_with_mixture_model(net_mix, fc3_1, fc3_2, loaders1[1]))
print("Model-Mix, Loader-2:", predict_using_epistemic_uncertainty_with_mixture_model(net_mix, fc3_1, fc3_2, loaders2[1]))
if __name__ == '__main__':
experiment_simultaneous_without_mixture_model_with_confidence()

50
Mixtures/temp_gmm.py Executable file
View File

@ -0,0 +1,50 @@
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import gmm
def create_synthetic_data(num_gaussians, num_features, num_samples, means, vars):
assert len(means[0]) == len(vars[0]) == num_features
samples = []
for g in range(num_gaussians):
loc = torch.tensor(means[g]).float()
covariance_matrix = torch.eye(num_features).float() * torch.tensor(vars[g]).float()
dist = torch.distributions.multivariate_normal.MultivariateNormal(
loc = loc, covariance_matrix=covariance_matrix)
for i in range(num_samples//num_gaussians):
sample = dist.sample()
samples.append(sample.unsqueeze(0))
samples = torch.cat(samples, axis=0)
return samples
def plot_data(data, y=None):
if y is not None:
for sample, target in zip(data, y):
if target==0:
plt.scatter(*sample, color='blue')
elif target==1:
plt.scatter(*sample, color='red')
elif target==2:
plt.scatter(*sample, color='green')
else:
for sample in data:
plt.scatter(*sample, color='black')
plt.show(block=False)
plt.pause(2)
plt.close()
means = [[1, 4], [5, 5], [2, -1]] # list of task's means(which is mean of each feature)
vars = [[0.1, 0.1], [0.05, 0.4], [0.5, 0.2]] # list of task's vars(which is var of each feature)
data = create_synthetic_data(3, 2, 600, means, vars) # shape: (total_samples, num_features)
plot_data(data)
model = gmm.GaussianMixture(3, 2) # (num_gaussians, num_features)
model.fit(data)
y = model.predict(data)
plot_data(data, y)

83
Mixtures/train_splitted.py Executable file
View File

@ -0,0 +1,83 @@
import sys
sys.path.append('..')
import os
import argparse
import torch
from torch import nn
import numpy as np
from torch.optim import Adam
import utils
import metrics
import config_mixtures as cfg
import utils_mixture as mix_utils
from main_bayesian import train_model as train_bayesian, validate_model as validate_bayesian
from main_frequentist import train_model as train_frequentist, validate_model as validate_frequentist
# CUDA settings
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def train_splitted(num_tasks, bayesian=True, net_type='lenet'):
assert 10 % num_tasks == 0
# Hyper Parameter settings
train_ens = cfg.train_ens
valid_ens = cfg.valid_ens
n_epochs = cfg.n_epochs
lr_start = cfg.lr_start
if bayesian:
ckpt_dir = f"checkpoints/MNIST/bayesian/splitted/{num_tasks}-tasks/"
else:
ckpt_dir = f"checkpoints/MNIST/frequentist/splitted/{num_tasks}-tasks/"
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir, exist_ok=True)
loaders, datasets = mix_utils.get_splitmnist_dataloaders(num_tasks, return_datasets=True)
models = mix_utils.get_splitmnist_models(num_tasks, bayesian=bayesian, pretrained=False, net_type=net_type)
for task in range(1, num_tasks + 1):
print(f"Training task-{task}..")
trainset, testset, _, _ = datasets[task-1]
train_loader, valid_loader, _ = loaders[task-1]
net = models[task-1]
net = net.to(device)
ckpt_name = ckpt_dir + f"model_{net_type}_{num_tasks}.{task}.pt"
criterion = (metrics.ELBO(len(trainset)) if bayesian else nn.CrossEntropyLoss()).to(device)
optimizer = Adam(net.parameters(), lr=lr_start)
valid_loss_max = np.Inf
for epoch in range(n_epochs): # loop over the dataset multiple times
utils.adjust_learning_rate(optimizer, metrics.lr_linear(epoch, 0, n_epochs, lr_start))
if bayesian:
train_loss, train_acc, train_kl = train_bayesian(net, optimizer, criterion, train_loader, num_ens=train_ens)
valid_loss, valid_acc = validate_bayesian(net, criterion, valid_loader, num_ens=valid_ens)
print('Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.4f} \ttrain_kl_div: {:.4f}'.format(
epoch, train_loss, train_acc, valid_loss, valid_acc, train_kl))
else:
train_loss, train_acc = train_frequentist(net, optimizer, criterion, train_loader)
valid_loss, valid_acc = validate_frequentist(net, criterion, valid_loader)
print('Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.4f}'.format(
epoch, train_loss, train_acc, valid_loss, valid_acc))
# save model if validation accuracy has increased
if valid_loss <= valid_loss_max:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_max, valid_loss))
torch.save(net.state_dict(), ckpt_name)
valid_loss_max = valid_loss
print(f"Done training task-{task}")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description = "PyTorch Bayesian Split Model Training")
parser.add_argument('--num_tasks', default=2, type=int, help='number of tasks')
parser.add_argument('--net_type', default='lenet', type=str, help='model')
parser.add_argument('--bayesian', default=1, type=int, help='is_bayesian_model(0/1)')
args = parser.parse_args()
train_splitted(args.num_tasks, bool(args.bayesian), args.net_type)

259
Mixtures/utils_mixture.py Executable file
View File

@ -0,0 +1,259 @@
import sys
sys.path.append('..')
import os
import torch
import numpy as np
import torch.nn as nn
from torch.nn import functional as F
import data
import utils
import metrics
from main_bayesian import getModel as getBayesianModel
from main_frequentist import getModel as getFrequentistModel
import config_mixtures as cfg
import uncertainty_estimation as ue
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Pass(nn.Module):
def __init__(self):
super(Pass, self).__init__()
def forward(self, x):
return x
def _get_splitmnist_datasets(num_tasks):
datasets = []
for i in range(1, num_tasks + 1):
name = 'SplitMNIST-{}.{}'.format(num_tasks, i)
datasets.append(data.getDataset(name))
return datasets
def get_splitmnist_dataloaders(num_tasks, return_datasets=False):
loaders = []
datasets = _get_splitmnist_datasets(num_tasks)
for i in range(1, num_tasks + 1):
trainset, testset, _, _ = datasets[i-1]
curr_loaders = data.getDataloader(
trainset, testset, cfg.valid_size, cfg.batch_size, cfg.num_workers)
loaders.append(curr_loaders) # (train_loader, valid_loader, test_loader)
if return_datasets:
return loaders, datasets
return loaders
def get_splitmnist_models(num_tasks, bayesian=True, pretrained=False, weights_dir=None, net_type='lenet'):
inputs = 1
outputs = 10 // num_tasks
models = []
if pretrained:
assert weights_dir
for i in range(1, num_tasks + 1):
if bayesian:
model = getBayesianModel(net_type, inputs, outputs)
else:
model = getFrequentistModel(net_type, inputs, outputs)
models.append(model)
if pretrained:
weight_path = weights_dir + f"model_{net_type}_{num_tasks}.{i}.pt"
models[-1].load_state_dict(torch.load(weight_path))
return models
def get_mixture_model(num_tasks, weights_dir, net_type='lenet', include_last_layer=True):
"""
Current implementation is based on average value of weights
"""
net = getBayesianModel(net_type, 1, 5)
if not include_last_layer:
net.fc3 = Pass()
task_weights = []
for i in range(1, num_tasks + 1):
weight_path = weights_dir + f"model_{net_type}_{num_tasks}.{i}.pt"
task_weights.append(torch.load(weight_path))
mixture_weights = net.state_dict().copy()
layer_list = list(mixture_weights.keys())
for key in mixture_weights:
if key in layer_list:
concat_weights = torch.cat([w[key].unsqueeze(0) for w in task_weights] , dim=0)
average_weight = torch.mean(concat_weights, dim=0)
mixture_weights[key] = average_weight
net.load_state_dict(mixture_weights)
return net
def predict_regular(net, validloader, bayesian=True, num_ens=10):
"""
For both Bayesian and Frequentist models
"""
net.eval()
accs = []
for i, (inputs, labels) in enumerate(validloader):
inputs, labels = inputs.to(device), labels.to(device)
if bayesian:
outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).to(device)
for j in range(num_ens):
net_out, _ = net(inputs)
outputs[:, :, j] = F.log_softmax(net_out, dim=1).data
log_outputs = utils.logmeanexp(outputs, dim=2)
accs.append(metrics.acc(log_outputs, labels))
else:
output = net(inputs)
accs.append(metrics.acc(output.detach(), labels))
return np.mean(accs)
def predict_using_uncertainty_separate_models(net1, net2, valid_loader, uncertainty_type='epistemic_softmax', T=25):
"""
For Bayesian models
"""
accs = []
total_u1 = 0.0
total_u2 = 0.0
set1_selected = 0
set2_selected = 0
epi_or_ale, soft_or_norm = uncertainty_type.split('_')
soft_or_norm = True if soft_or_norm=='normalized' else False
for i, (inputs, labels) in enumerate(valid_loader):
inputs, labels = inputs.to(device), labels.to(device)
pred1, epi1, ale1 = ue.get_uncertainty_per_batch(net1, inputs, T=T, normalized=soft_or_norm)
pred2, epi2, ale2 = ue.get_uncertainty_per_batch(net2, inputs, T=T, normalized=soft_or_norm)
if epi_or_ale=='epistemic':
u1 = np.sum(epi1, axis=1)
u2 = np.sum(epi2, axis=1)
elif epi_or_ale=='aleatoric':
u1 = np.sum(ale1, axis=1)
u2 = np.sum(ale2, axis=1)
elif epi_or_ale=='both':
u1 = np.sum(epi1, axis=1) + np.sum(ale1, axis=1)
u2 = np.sum(epi2, axis=1) + np.sum(ale2, axis=1)
else:
raise ValueError("Not correct uncertainty type")
total_u1 += np.sum(u1).item()
total_u2 += np.sum(u2).item()
set1_preferred = u2 > u1 # idx where set1 has less uncertainty
set1_preferred = np.expand_dims(set1_preferred, 1)
preds = np.where(set1_preferred, pred1, pred2)
set1_selected += np.sum(set1_preferred)
set2_selected += np.sum(~set1_preferred)
accs.append(metrics.acc(torch.tensor(preds), labels))
return np.mean(accs), set1_selected/(set1_selected + set2_selected), \
set2_selected/(set1_selected + set2_selected), total_u1, total_u2
def predict_using_confidence_separate_models(net1, net2, valid_loader):
"""
For Frequentist models
"""
accs = []
set1_selected = 0
set2_selected = 0
for i, (inputs, labels) in enumerate(valid_loader):
inputs, labels = inputs.to(device), labels.to(device)
pred1 = F.softmax(net1(inputs), dim=1)
pred2 = F.softmax(net2(inputs), dim=1)
set1_preferred = pred1.max(dim=1)[0] > pred2.max(dim=1)[0] # idx where set1 has more confidence
preds = torch.where(set1_preferred.unsqueeze(1), pred1, pred2)
set1_selected += torch.sum(set1_preferred).float().item()
set2_selected += torch.sum(~set1_preferred).float().item()
accs.append(metrics.acc(preds.detach(), labels))
return np.mean(accs), set1_selected/(set1_selected + set2_selected), \
set2_selected/(set1_selected + set2_selected)
def wip_predict_using_epistemic_uncertainty_with_mixture_model(model, fc3_1, fc3_2, valid_loader, T=10):
accs = []
total_epistemic_1 = 0.0
total_epistemic_2 = 0.0
set_1_selected = 0
set_2_selected = 0
for i, (inputs, labels) in enumerate(valid_loader):
inputs, labels = inputs.to(device), labels.to(device)
outputs = []
for i in range(inputs.shape[0]): # loop over batch
input_image = inputs[i].unsqueeze(0)
p_hat_1 = []
p_hat_2 = []
preds_1 = []
preds_2 = []
for t in range(T):
net_out_mix, _ = model(input_image)
# set_1
net_out_1 = fc3_1(net_out_mix)
preds_1.append(net_out_1)
prediction = F.softplus(net_out_1)
prediction = prediction / torch.sum(prediction, dim=1)
p_hat_1.append(prediction.cpu().detach())
# set_2
net_out_2 = fc3_2(net_out_mix)
preds_2.append(net_out_2)
prediction = F.softplus(net_out_2)
prediction = prediction / torch.sum(prediction, dim=1)
p_hat_2.append(prediction.cpu().detach())
# set_1
p_hat = torch.cat(p_hat_1, dim=0).numpy()
p_bar = np.mean(p_hat, axis=0)
preds = torch.cat(preds_1, dim=0)
pred_set_1 = torch.sum(preds, dim=0).unsqueeze(0)
temp = p_hat - np.expand_dims(p_bar, 0)
epistemic = np.dot(temp.T, temp) / T
epistemic_set_1 = np.sum(np.diag(epistemic)).item()
total_epistemic_1 += epistemic_set_1
# set_2
p_hat = torch.cat(p_hat_2, dim=0).numpy()
p_bar = np.mean(p_hat, axis=0)
preds = torch.cat(preds_2, dim=0)
pred_set_2 = torch.sum(preds, dim=0).unsqueeze(0)
temp = p_hat - np.expand_dims(p_bar, 0)
epistemic = np.dot(temp.T, temp) / T
epistemic_set_2 = np.sum(np.diag(epistemic)).item()
total_epistemic_2 += epistemic_set_2
if epistemic_set_1 > epistemic_set_2:
set_2_selected += 1
outputs.append(pred_set_2)
else:
set_1_selected += 1
outputs.append(pred_set_1)
outputs = torch.cat(outputs, dim=0)
accs.append(metrics.acc(outputs.detach(), labels))
return np.mean(accs), set_1_selected/(set_1_selected + set_2_selected), \
set_2_selected/(set_1_selected + set_2_selected), total_epistemic_1, total_epistemic_2

3
README.md Executable file
View File

@ -0,0 +1,3 @@
# Energy efficiency comparison
This experiment compares a Frequentist CNN model against a Bayesian CNN model

19
config_bayesian.py Executable file
View File

@ -0,0 +1,19 @@
############### Configuration file for Bayesian ###############
layer_type = 'lrt' # 'bbb' or 'lrt'
activation_type = 'softplus' # 'softplus' or 'relu'
priors={
'prior_mu': 0,
'prior_sigma': 0.1,
'posterior_mu_initial': (0, 0.1), # (mean, std) normal_
'posterior_rho_initial': (-5, 0.1), # (mean, std) normal_
}
n_epochs = 200
lr_start = 0.001
num_workers = 4
valid_size = 0.2
batch_size = 256
train_ens = 1
valid_ens = 1
beta_type = 0.1 # 'Blundell', 'Standard', etc. Use float for const value
wide = 1

7
config_frequentist.py Executable file
View File

@ -0,0 +1,7 @@
############### Configuration file for Frequentist ###############
n_epochs = 200
lr = 0.001
num_workers = 4
valid_size = 0.2
batch_size = 256
wide = 1

4
cpu_watt.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/bash
powerstat -z 0.5 1000000 > $1

64
gpu_sample_draw.py Executable file
View File

@ -0,0 +1,64 @@
import os
import re
import pickle
import numpy as np
from warnings import warn
c = 1.0
pickle_name = "freq_wattdata_"+str(c)+".pkl"
def get_sample_of_gpu():
from re import sub, findall
import subprocess
from subprocess import run
no_graph = "NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running."
no_version = "Failed to initialize NVML: Driver/library version mismatch"
smi_string = run(['nvidia-smi'], stdout=subprocess.PIPE)
smi_string = smi_string.stdout.decode('utf-8')
smi_string = smi_string.split("\n")
if smi_string[0] == no_graph:
raise Exception("It seems that no NVIDIA GPU is installed")
elif smi_string[0] == no_version:
raise Exception("nvidia-smi version mismatch")
else:
return findall("[0-9]*MiB | [0-9]*W",smi_string[9])
#for l in smi_string:
#temp = findall("[0-9]*MiB | [0-9]*W",l)
#if temp:
#return temp
def total_watt_consumed():
with open(pickle_name, 'rb') as f:
x = pickle.load(f)
x = np.array(x)
x = x[:,0]
y = [int(re.findall("\d+",xi)[0]) for xi in x]
return sum(y)
if __name__ == '__main__':
dataDump = []
#var = True
#pickling_on = open("wattdata.pickle","wb")
while True:
#from run_service import retcode
try:
dataDump.append(get_sample_of_gpu())
with open(pickle_name, 'wb') as f:
pickle.dump(dataDump, f)
except EOFError:
warn('Pickle ran out of space')
c += 0.01
finally:
f.close()
#if retcode == 0:
#break
#pickle.dump(dataDump, pickling_on)
#pickling_on.close()

83
layers/BBB/BBBConv.py Executable file
View File

@ -0,0 +1,83 @@
import sys
sys.path.append("..")
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
from metrics import calculate_kl as KL_DIV
from ..misc import ModuleWrapper
class BBBConv2d(ModuleWrapper):
def __init__(self, in_channels, out_channels, kernel_size,
stride=1, padding=0, dilation=1, bias=True, priors=None):
super(BBBConv2d, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = 1
self.use_bias = bias
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if priors is None:
priors = {
'prior_mu': 0,
'prior_sigma': 0.1,
'posterior_mu_initial': (0, 0.1),
'posterior_rho_initial': (-3, 0.1),
}
self.prior_mu = priors['prior_mu']
self.prior_sigma = priors['prior_sigma']
self.posterior_mu_initial = priors['posterior_mu_initial']
self.posterior_rho_initial = priors['posterior_rho_initial']
self.W_mu = Parameter(torch.empty((out_channels, in_channels, *self.kernel_size), device=self.device))
self.W_rho = Parameter(torch.empty((out_channels, in_channels, *self.kernel_size), device=self.device))
if self.use_bias:
self.bias_mu = Parameter(torch.empty((out_channels), device=self.device))
self.bias_rho = Parameter(torch.empty((out_channels), device=self.device))
else:
self.register_parameter('bias_mu', None)
self.register_parameter('bias_rho', None)
self.reset_parameters()
def reset_parameters(self):
self.W_mu.data.normal_(*self.posterior_mu_initial)
self.W_rho.data.normal_(*self.posterior_rho_initial)
if self.use_bias:
self.bias_mu.data.normal_(*self.posterior_mu_initial)
self.bias_rho.data.normal_(*self.posterior_rho_initial)
def forward(self, input, sample=True):
if self.training or sample:
W_eps = torch.empty(self.W_mu.size()).normal_(0, 1).to(self.device)
self.W_sigma = torch.log1p(torch.exp(self.W_rho))
weight = self.W_mu + W_eps * self.W_sigma
if self.use_bias:
bias_eps = torch.empty(self.bias_mu.size()).normal_(0, 1).to(self.device)
self.bias_sigma = torch.log1p(torch.exp(self.bias_rho))
bias = self.bias_mu + bias_eps * self.bias_sigma
else:
bias = None
else:
weight = self.W_mu
bias = self.bias_mu if self.use_bias else None
return F.conv2d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups)
def kl_loss(self):
kl = KL_DIV(self.prior_mu, self.prior_sigma, self.W_mu, self.W_sigma)
if self.use_bias:
kl += KL_DIV(self.prior_mu, self.prior_sigma, self.bias_mu, self.bias_sigma)
return kl

76
layers/BBB/BBBLinear.py Executable file
View File

@ -0,0 +1,76 @@
import sys
sys.path.append("..")
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
from metrics import calculate_kl as KL_DIV
from ..misc import ModuleWrapper
class BBBLinear(ModuleWrapper):
def __init__(self, in_features, out_features, bias=True, priors=None):
super(BBBLinear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.use_bias = bias
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if priors is None:
priors = {
'prior_mu': 0,
'prior_sigma': 0.1,
'posterior_mu_initial': (0, 0.1),
'posterior_rho_initial': (-3, 0.1),
}
self.prior_mu = priors['prior_mu']
self.prior_sigma = priors['prior_sigma']
self.posterior_mu_initial = priors['posterior_mu_initial']
self.posterior_rho_initial = priors['posterior_rho_initial']
self.W_mu = Parameter(torch.empty((out_features, in_features), device=self.device))
self.W_rho = Parameter(torch.empty((out_features, in_features), device=self.device))
if self.use_bias:
self.bias_mu = Parameter(torch.empty((out_features), device=self.device))
self.bias_rho = Parameter(torch.empty((out_features), device=self.device))
else:
self.register_parameter('bias_mu', None)
self.register_parameter('bias_rho', None)
self.reset_parameters()
def reset_parameters(self):
self.W_mu.data.normal_(*self.posterior_mu_initial)
self.W_rho.data.normal_(*self.posterior_rho_initial)
if self.use_bias:
self.bias_mu.data.normal_(*self.posterior_mu_initial)
self.bias_rho.data.normal_(*self.posterior_rho_initial)
def forward(self, input, sample=True):
if self.training or sample:
W_eps = torch.empty(self.W_mu.size()).normal_(0, 1).to(self.device)
self.W_sigma = torch.log1p(torch.exp(self.W_rho))
weight = self.W_mu + W_eps * self.W_sigma
if self.use_bias:
bias_eps = torch.empty(self.bias_mu.size()).normal_(0, 1).to(self.device)
self.bias_sigma = torch.log1p(torch.exp(self.bias_rho))
bias = self.bias_mu + bias_eps * self.bias_sigma
else:
bias = None
else:
weight = self.W_mu
bias = self.bias_mu if self.use_bias else None
return F.linear(input, weight, bias)
def kl_loss(self):
kl = KL_DIV(self.prior_mu, self.prior_sigma, self.W_mu, self.W_sigma)
if self.use_bias:
kl += KL_DIV(self.prior_mu, self.prior_sigma, self.bias_mu, self.bias_sigma)
return kl

87
layers/BBB_LRT/BBBConv.py Executable file
View File

@ -0,0 +1,87 @@
import sys
sys.path.append("..")
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
import utils
from metrics import calculate_kl as KL_DIV
import config_bayesian as cfg
from ..misc import ModuleWrapper
class BBBConv2d(ModuleWrapper):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
padding=0, dilation=1, bias=True, priors=None):
super(BBBConv2d, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = (kernel_size, kernel_size)
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = 1
self.use_bias = bias
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if priors is None:
priors = {
'prior_mu': 0,
'prior_sigma': 0.1,
'posterior_mu_initial': (0, 0.1),
'posterior_rho_initial': (-3, 0.1),
}
self.prior_mu = priors['prior_mu']
self.prior_sigma = priors['prior_sigma']
self.posterior_mu_initial = priors['posterior_mu_initial']
self.posterior_rho_initial = priors['posterior_rho_initial']
self.W_mu = Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size))
self.W_rho = Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size))
if self.use_bias:
self.bias_mu = Parameter(torch.Tensor(out_channels))
self.bias_rho = Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias_mu', None)
self.register_parameter('bias_rho', None)
self.reset_parameters()
def reset_parameters(self):
self.W_mu.data.normal_(*self.posterior_mu_initial)
self.W_rho.data.normal_(*self.posterior_rho_initial)
if self.use_bias:
self.bias_mu.data.normal_(*self.posterior_mu_initial)
self.bias_rho.data.normal_(*self.posterior_rho_initial)
def forward(self, x, sample=True):
self.W_sigma = torch.log1p(torch.exp(self.W_rho))
if self.use_bias:
self.bias_sigma = torch.log1p(torch.exp(self.bias_rho))
bias_var = self.bias_sigma ** 2
else:
self.bias_sigma = bias_var = None
act_mu = F.conv2d(
x, self.W_mu, self.bias_mu, self.stride, self.padding, self.dilation, self.groups)
act_var = 1e-16 + F.conv2d(
x ** 2, self.W_sigma ** 2, bias_var, self.stride, self.padding, self.dilation, self.groups)
act_std = torch.sqrt(act_var)
if self.training or sample:
eps = torch.empty(act_mu.size()).normal_(0, 1).to(self.device)
return act_mu + act_std * eps
else:
return act_mu
def kl_loss(self):
kl = KL_DIV(self.prior_mu, self.prior_sigma, self.W_mu, self.W_sigma)
if self.use_bias:
kl += KL_DIV(self.prior_mu, self.prior_sigma, self.bias_mu, self.bias_sigma)
return kl

79
layers/BBB_LRT/BBBLinear.py Executable file
View File

@ -0,0 +1,79 @@
import sys
sys.path.append("..")
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
import utils
from metrics import calculate_kl as KL_DIV
import config_bayesian as cfg
from ..misc import ModuleWrapper
class BBBLinear(ModuleWrapper):
def __init__(self, in_features, out_features, bias=True, priors=None):
super(BBBLinear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.use_bias = bias
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if priors is None:
priors = {
'prior_mu': 0,
'prior_sigma': 0.1,
'posterior_mu_initial': (0, 0.1),
'posterior_rho_initial': (-3, 0.1),
}
self.prior_mu = priors['prior_mu']
self.prior_sigma = priors['prior_sigma']
self.posterior_mu_initial = priors['posterior_mu_initial']
self.posterior_rho_initial = priors['posterior_rho_initial']
self.W_mu = Parameter(torch.Tensor(out_features, in_features))
self.W_rho = Parameter(torch.Tensor(out_features, in_features))
if self.use_bias:
self.bias_mu = Parameter(torch.Tensor(out_features))
self.bias_rho = Parameter(torch.Tensor(out_features))
else:
self.register_parameter('bias_mu', None)
self.register_parameter('bias_rho', None)
self.reset_parameters()
def reset_parameters(self):
self.W_mu.data.normal_(*self.posterior_mu_initial)
self.W_rho.data.normal_(*self.posterior_rho_initial)
if self.use_bias:
self.bias_mu.data.normal_(*self.posterior_mu_initial)
self.bias_rho.data.normal_(*self.posterior_rho_initial)
def forward(self, x, sample=True):
self.W_sigma = torch.log1p(torch.exp(self.W_rho))
if self.use_bias:
self.bias_sigma = torch.log1p(torch.exp(self.bias_rho))
bias_var = self.bias_sigma ** 2
else:
self.bias_sigma = bias_var = None
act_mu = F.linear(x, self.W_mu, self.bias_mu)
act_var = 1e-16 + F.linear(x ** 2, self.W_sigma ** 2, bias_var)
act_std = torch.sqrt(act_var)
if self.training or sample:
eps = torch.empty(act_mu.size()).normal_(0, 1).to(self.device)
return act_mu + act_std * eps
else:
return act_mu
def kl_loss(self):
kl = KL_DIV(self.prior_mu, self.prior_sigma, self.W_mu, self.W_sigma)
if self.use_bias:
kl += KL_DIV(self.prior_mu, self.prior_sigma, self.bias_mu, self.bias_sigma)
return kl

35
layers/misc.py Executable file
View File

@ -0,0 +1,35 @@
from torch import nn
class ModuleWrapper(nn.Module):
"""Wrapper for nn.Module with support for arbitrary flags and a universal forward pass"""
def __init__(self):
super(ModuleWrapper, self).__init__()
def set_flag(self, flag_name, value):
setattr(self, flag_name, value)
for m in self.children():
if hasattr(m, 'set_flag'):
m.set_flag(flag_name, value)
def forward(self, x):
for module in self.children():
x = module(x)
kl = 0.0
for module in self.modules():
if hasattr(module, 'kl_loss'):
kl = kl + module.kl_loss()
return x, kl
class FlattenLayer(ModuleWrapper):
def __init__(self, num_features):
super(FlattenLayer, self).__init__()
self.num_features = num_features
def forward(self, x):
return x.view(-1, self.num_features)

167
main_bayesian.py Executable file
View File

@ -0,0 +1,167 @@
from __future__ import print_function
import os
import data
import utils
import torch
import pickle
import metrics
import argparse
import numpy as np
import config_bayesian as cfg
from datetime import datetime
from torch.nn import functional as F
from torch.optim import Adam, lr_scheduler
import gpu_sample_draw
from models.BayesianModels.BayesianLeNet import BBBLeNet
from models.BayesianModels.BayesianAlexNet import BBBAlexNet
from models.BayesianModels.Bayesian3Conv3FC import BBB3Conv3FC
# CUDA settings
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def getModel(net_type, inputs, outputs, priors, layer_type, activation_type):
if (net_type == 'lenet'):
return BBBLeNet(outputs, inputs, priors, layer_type, activation_type,wide=cfg.wide)
elif (net_type == 'alexnet'):
return BBBAlexNet(outputs, inputs, priors, layer_type, activation_type)
elif (net_type == '3conv3fc'):
return BBB3Conv3FC(outputs, inputs, priors, layer_type, activation_type)
else:
raise ValueError('Network should be either [LeNet / AlexNet / 3Conv3FC')
def train_model(net, optimizer, criterion, trainloader, num_ens=1, beta_type=0.1, epoch=None, num_epochs=None):
net.train()
training_loss = 0.0
accs = []
kl_list = []
for i, (inputs, labels) in enumerate(trainloader, 1):
optimizer.zero_grad()
inputs, labels = inputs.to(device), labels.to(device)
outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).to(device)
kl = 0.0
for j in range(num_ens):
net_out, _kl = net(inputs)
kl += _kl
outputs[:, :, j] = F.log_softmax(net_out, dim=1)
kl = kl / num_ens
kl_list.append(kl.item())
log_outputs = utils.logmeanexp(outputs, dim=2)
beta = metrics.get_beta(i-1, len(trainloader), beta_type, epoch, num_epochs)
loss = criterion(log_outputs, labels, kl, beta)
loss.backward()
optimizer.step()
accs.append(metrics.acc(log_outputs.data, labels))
training_loss += loss.cpu().data.numpy()
return training_loss/len(trainloader), np.mean(accs), np.mean(kl_list)
def validate_model(net, criterion, validloader, num_ens=1, beta_type=0.1, epoch=None, num_epochs=None):
"""Calculate ensemble accuracy and NLL Loss"""
net.train()
valid_loss = 0.0
accs = []
for i, (inputs, labels) in enumerate(validloader):
inputs, labels = inputs.to(device), labels.to(device)
outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).to(device)
kl = 0.0
for j in range(num_ens):
net_out, _kl = net(inputs)
kl += _kl
outputs[:, :, j] = F.log_softmax(net_out, dim=1).data
log_outputs = utils.logmeanexp(outputs, dim=2)
beta = metrics.get_beta(i-1, len(validloader), beta_type, epoch, num_epochs)
valid_loss += criterion(log_outputs, labels, kl, beta).item()
accs.append(metrics.acc(log_outputs, labels))
return valid_loss/len(validloader), np.mean(accs)
def run(dataset, net_type):
# Hyper Parameter settings
layer_type = cfg.layer_type
activation_type = cfg.activation_type
priors = cfg.priors
train_ens = cfg.train_ens
valid_ens = cfg.valid_ens
n_epochs = cfg.n_epochs
lr_start = cfg.lr_start
num_workers = cfg.num_workers
valid_size = cfg.valid_size
batch_size = cfg.batch_size
beta_type = cfg.beta_type
trainset, testset, inputs, outputs = data.getDataset(dataset)
train_loader, valid_loader, test_loader = data.getDataloader(
trainset, testset, valid_size, batch_size, num_workers)
net = getModel(net_type, inputs, outputs, priors, layer_type, activation_type).to(device)
ckpt_dir = f'checkpoints/{dataset}/bayesian'
ckpt_name = f'checkpoints/{dataset}/bayesian/model_{net_type}_{layer_type}_{activation_type}_{cfg.wide}.pt'
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir, exist_ok=True)
criterion = metrics.ELBO(len(trainset)).to(device)
optimizer = Adam(net.parameters(), lr=lr_start)
lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer, patience=6, verbose=True)
#valid_loss_max = np.Inf
#early_stop = []
#thrs=1e-9
train_data = []
for epoch in range(n_epochs): # loop over the dataset multiple times
train_loss, train_acc, train_kl = train_model(net, optimizer, criterion, train_loader, num_ens=train_ens, beta_type=beta_type, epoch=epoch, num_epochs=n_epochs)
valid_loss, valid_acc = validate_model(net, criterion, valid_loader, num_ens=valid_ens, beta_type=beta_type, epoch=epoch, num_epochs=n_epochs)
lr_sched.step(valid_loss)
train_data.append([epoch,train_loss,train_acc,valid_loss,valid_acc])
print('Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.4f} \ttrain_kl_div: {:.4f}'.format(
epoch, train_loss, train_acc, valid_loss, valid_acc, train_kl))
#early_stop.append(valid_acc)
#if epoch % 4 == 0 and epoch > 0:
#print("Value 1: {} >= {}, Value 2: {} >= {}, Value 2: {} >= {}".format(early_stop[0],valid_acc-thrs,early_stop[1],valid_acc-thrs,early_stop[2],valid_acc-thrs))
#if abs(early_stop[0]) >= valid_acc-thrs and abs(early_stop[1]) >= valid_acc-thrs and abs(early_stop[2]) >= valid_acc-thrs:
#break
#early_stop = []
if train_acc >= 0.50:
break
#if gpu_sample_draw.total_watt_consumed() > 100000:
#break
# save model on last epoch
#if epoch == (n_epochs-1):
#torch.save(net.state_dict(), ckpt_name)
with open("bayes_exp_data_"+str(cfg.wide)+".pkl", 'wb') as f:
pickle.dump(train_data, f)
if __name__ == '__main__':
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Initial Time =", current_time)
parser = argparse.ArgumentParser(description = "PyTorch Bayesian Model Training")
parser.add_argument('--net_type', default='lenet', type=str, help='model')
parser.add_argument('--dataset', default='CIFAR10', type=str, help='dataset = [MNIST/CIFAR10/CIFAR100]')
args = parser.parse_args()
run(args.dataset, args.net_type)
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Final Time =", current_time)

137
main_frequentist.py Executable file
View File

@ -0,0 +1,137 @@
from __future__ import print_function
import os
import data
import torch
#import utils
import pickle
import metrics
import argparse
import numpy as np
import torch.nn as nn
import gpu_sample_draw
from datetime import datetime
import config_frequentist as cfg
from torch.optim import Adam, lr_scheduler
from models.NonBayesianModels.LeNet import LeNet
from models.NonBayesianModels.AlexNet import AlexNet
from models.NonBayesianModels.ThreeConvThreeFC import ThreeConvThreeFC
# CUDA settings
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def getModel(net_type, inputs, outputs,wide=cfg.wide):
if (net_type == 'lenet'):
return LeNet(outputs, inputs,wide)
elif (net_type == 'alexnet'):
return AlexNet(outputs, inputs)
elif (net_type == '3conv3fc'):
return ThreeConvThreeFC(outputs, inputs)
else:
raise ValueError('Network should be either [LeNet / AlexNet / 3Conv3FC')
def train_model(net, optimizer, criterion, train_loader):
train_loss = 0.0
net.train()
accs = []
for data, target in train_loader:
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = net(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
accs.append(metrics.acc(output.detach(), target))
return train_loss, np.mean(accs)
def validate_model(net, criterion, valid_loader):
valid_loss = 0.0
net.eval()
accs = []
for data, target in valid_loader:
data, target = data.to(device), target.to(device)
output = net(data)
loss = criterion(output, target)
valid_loss += loss.item()*data.size(0)
accs.append(metrics.acc(output.detach(), target))
return valid_loss, np.mean(accs)
def run(dataset, net_type):
# Hyper Parameter settings
n_epochs = cfg.n_epochs
lr = cfg.lr
num_workers = cfg.num_workers
valid_size = cfg.valid_size
batch_size = cfg.batch_size
trainset, testset, inputs, outputs = data.getDataset(dataset)
train_loader, valid_loader, test_loader = data.getDataloader(
trainset, testset, valid_size, batch_size, num_workers)
net = getModel(net_type, inputs, outputs).to(device)
ckpt_dir = f'checkpoints/{dataset}/frequentist'
ckpt_name = f'checkpoints/{dataset}/frequentist/model_{net_type}_{cfg.wide}.pt'
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir, exist_ok=True)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(net.parameters(), lr=lr)
lr_sched = lr_scheduler.ReduceLROnPlateau(optimizer, patience=6, verbose=True)
#valid_loss_min = np.Inf
#early_stop = []
#thrs=1e-9
train_data = []
for epoch in range(1, n_epochs+1):
train_loss, train_acc = train_model(net, optimizer, criterion, train_loader)
valid_loss, valid_acc = validate_model(net, criterion, valid_loader)
lr_sched.step(valid_loss)
train_loss = train_loss/len(train_loader.dataset)
valid_loss = valid_loss/len(valid_loader.dataset)
train_data.append([epoch,train_loss,train_acc,valid_loss,valid_acc])
print('Epoch: {} \tTraining Loss: {:.4f} \tTraining Accuracy: {:.4f} \tValidation Loss: {:.4f} \tValidation Accuracy: {:.4f}'.format(
epoch, train_loss, train_acc, valid_loss, valid_acc))
#early_stop.append(valid_acc)
#if epoch % 4 == 0 and epoch > 0:
#print("Value 1: {} >= {}, Value 2: {} >= {}, Value 2: {} >= {}".format(early_stop[0],valid_acc-thrs,early_stop[1],valid_acc-thrs,early_stop[2],valid_acc-thrs))
#if abs(early_stop[0]) >= valid_acc-thrs and abs(early_stop[1]) >= valid_acc-thrs and abs(early_stop[2]) >= valid_acc-thrs:
#break
#early_stop = []
if train_acc >= 0.99:
break
#if gpu_sample_draw.total_watt_consumed() > 100000:
#break
# save model when finished
#if epoch == n_epochs:
#torch.save(net.state_dict(), ckpt_name)
with open("freq_exp_data_"+str(cfg.wide)+".pkl", 'wb') as f:
pickle.dump(train_data, f)
if __name__ == '__main__':
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Initial Time =", current_time)
parser = argparse.ArgumentParser(description = "PyTorch Frequentist Model Training")
parser.add_argument('--net_type', default='lenet', type=str, help='model')
parser.add_argument('--dataset', default='MNIST', type=str, help='dataset = [MNIST/CIFAR10/CIFAR100]')
args = parser.parse_args()
run(args.dataset, args.net_type)
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("Final Time =", current_time)

8
mem_free.sh Executable file
View File

@ -0,0 +1,8 @@
#!/bin/bash
while true
do
free -h >> $1
done

46
metrics.py Executable file
View File

@ -0,0 +1,46 @@
import numpy as np
import torch.nn.functional as F
from torch import nn
import torch
class ELBO(nn.Module):
def __init__(self, train_size):
super(ELBO, self).__init__()
self.train_size = train_size
def forward(self, input, target, kl, beta):
assert not target.requires_grad
return F.nll_loss(input, target, reduction='mean') * self.train_size + beta * kl
# def lr_linear(epoch_num, decay_start, total_epochs, start_value):
# if epoch_num < decay_start:
# return start_value
# return start_value*float(total_epochs-epoch_num)/float(total_epochs-decay_start)
def acc(outputs, targets):
return np.mean(outputs.cpu().numpy().argmax(axis=1) == targets.data.cpu().numpy())
def calculate_kl(mu_q, sig_q, mu_p, sig_p):
kl = 0.5 * (2 * torch.log(sig_p / sig_q) - 1 + (sig_q / sig_p).pow(2) + ((mu_p - mu_q) / sig_p).pow(2)).sum()
return kl
def get_beta(batch_idx, m, beta_type, epoch, num_epochs):
if type(beta_type) is float:
return beta_type
if beta_type == "Blundell":
beta = 2 ** (m - (batch_idx + 1)) / (2 ** m - 1)
elif beta_type == "Soenderby":
if epoch is None or num_epochs is None:
raise ValueError('Soenderby method requires both epoch and num_epochs to be passed.')
beta = min(epoch / (num_epochs // 4), 1)
elif beta_type == "Standard":
beta = 1 / m
else:
beta = 0
return beta

View File

@ -0,0 +1,55 @@
import math
import torch.nn as nn
from layers import BBB_Linear, BBB_Conv2d
from layers import BBB_LRT_Linear, BBB_LRT_Conv2d
from layers import FlattenLayer, ModuleWrapper
class BBB3Conv3FC(ModuleWrapper):
"""
Simple Neural Network having 3 Convolution
and 3 FC layers with Bayesian layers.
"""
def __init__(self, outputs, inputs, priors, layer_type='lrt', activation_type='softplus'):
super(BBB3Conv3FC, self).__init__()
self.num_classes = outputs
self.layer_type = layer_type
self.priors = priors
if layer_type=='lrt':
BBBLinear = BBB_LRT_Linear
BBBConv2d = BBB_LRT_Conv2d
elif layer_type=='bbb':
BBBLinear = BBB_Linear
BBBConv2d = BBB_Conv2d
else:
raise ValueError("Undefined layer_type")
if activation_type=='softplus':
self.act = nn.Softplus
elif activation_type=='relu':
self.act = nn.ReLU
else:
raise ValueError("Only softplus or relu supported")
self.conv1 = BBBConv2d(inputs, 32, 5, padding=2, bias=True, priors=self.priors)
self.act1 = self.act()
self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
self.conv2 = BBBConv2d(32, 64, 5, padding=2, bias=True, priors=self.priors)
self.act2 = self.act()
self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
self.conv3 = BBBConv2d(64, 128, 5, padding=1, bias=True, priors=self.priors)
self.act3 = self.act()
self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)
self.flatten = FlattenLayer(2 * 2 * 128)
self.fc1 = BBBLinear(2 * 2 * 128, 1000, bias=True, priors=self.priors)
self.act4 = self.act()
self.fc2 = BBBLinear(1000, 1000, bias=True, priors=self.priors)
self.act5 = self.act()
self.fc3 = BBBLinear(1000, outputs, bias=True, priors=self.priors)

View File

@ -0,0 +1,53 @@
import torch.nn as nn
import math
from layers import BBB_Linear, BBB_Conv2d
from layers import BBB_LRT_Linear, BBB_LRT_Conv2d
from layers import FlattenLayer, ModuleWrapper
class BBBAlexNet(ModuleWrapper):
'''The architecture of AlexNet with Bayesian Layers'''
def __init__(self, outputs, inputs, priors, layer_type='lrt', activation_type='softplus'):
super(BBBAlexNet, self).__init__()
self.num_classes = outputs
self.layer_type = layer_type
self.priors = priors
if layer_type=='lrt':
BBBLinear = BBB_LRT_Linear
BBBConv2d = BBB_LRT_Conv2d
elif layer_type=='bbb':
BBBLinear = BBB_Linear
BBBConv2d = BBB_Conv2d
else:
raise ValueError("Undefined layer_type")
if activation_type=='softplus':
self.act = nn.Softplus
elif activation_type=='relu':
self.act = nn.ReLU
else:
raise ValueError("Only softplus or relu supported")
self.conv1 = BBBConv2d(inputs, 64, 11, stride=4, padding=5, bias=True, priors=self.priors)
self.act1 = self.act()
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = BBBConv2d(64, 192, 5, padding=2, bias=True, priors=self.priors)
self.act2 = self.act()
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv3 = BBBConv2d(192, 384, 3, padding=1, bias=True, priors=self.priors)
self.act3 = self.act()
self.conv4 = BBBConv2d(384, 256, 3, padding=1, bias=True, priors=self.priors)
self.act4 = self.act()
self.conv5 = BBBConv2d(256, 128, 3, padding=1, bias=True, priors=self.priors)
self.act5 = self.act()
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.flatten = FlattenLayer(1 * 1 * 128)
self.classifier = BBBLinear(1 * 1 * 128, outputs, bias=True, priors=self.priors)

View File

@ -0,0 +1,49 @@
import math
import torch.nn as nn
from layers import BBB_Linear, BBB_Conv2d
from layers import BBB_LRT_Linear, BBB_LRT_Conv2d
from layers import FlattenLayer, ModuleWrapper
class BBBLeNet(ModuleWrapper):
'''The architecture of LeNet with Bayesian Layers'''
def __init__(self, outputs, inputs, priors, layer_type='lrt', activation_type='softplus',wide=2):
super(BBBLeNet, self).__init__()
self.num_classes = outputs
self.layer_type = layer_type
self.priors = priors
if layer_type=='lrt':
BBBLinear = BBB_LRT_Linear
BBBConv2d = BBB_LRT_Conv2d
elif layer_type=='bbb':
BBBLinear = BBB_Linear
BBBConv2d = BBB_Conv2d
else:
raise ValueError("Undefined layer_type")
if activation_type=='softplus':
self.act = nn.Softplus
elif activation_type=='relu':
self.act = nn.ReLU
else:
raise ValueError("Only softplus or relu supported")
self.conv1 = BBBConv2d(inputs, 6*wide, 5, padding=0, bias=True, priors=self.priors)
self.act1 = self.act()
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.conv2 = BBBConv2d(6*wide, 16*wide, 5, padding=0, bias=True, priors=self.priors)
self.act2 = self.act()
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.flatten = FlattenLayer(5 * 5 * 16*wide)
self.fc1 = BBBLinear(5 * 5 * 16*wide, 120*wide, bias=True, priors=self.priors)
self.act3 = self.act()
self.fc2 = BBBLinear(120*wide, 84, bias=True, priors=self.priors)
self.act4 = self.act()
self.fc3 = BBBLinear(84, outputs, bias=True, priors=self.priors)

View File

@ -0,0 +1,40 @@
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
def conv_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
#nn.init.xavier_uniform(m.weight, gain=np.sqrt(2))
nn.init.normal_(m.weight, mean=0, std=1)
nn.init.constant(m.bias, 0)
class AlexNet(nn.Module):
def __init__(self, num_classes, inputs=3):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(inputs, 64, kernel_size=11, stride=4, padding=5),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.classifier = nn.Linear(256, num_classes)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x

View File

@ -0,0 +1,40 @@
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from math import ceil
def conv_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
#nn.init.xavier_uniform(m.weight, gain=np.sqrt(2))
nn.init.normal_(m.weight, mean=0, std=1)
nn.init.constant(m.bias, 0)
class LeNet(nn.Module):
def __init__(self, num_classes, inputs=3, wide=2):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(inputs, ceil(6*wide), 5)
self.conv2 = nn.Conv2d(ceil(6*wide), ceil(16*wide), 5)
self.fc1 = nn.Linear(ceil(16*5*5*wide), ceil(120*wide))
self.fc2 = nn.Linear(ceil(120*wide), 84)
self.fc3 = nn.Linear(84, num_classes)
def forward(self, x):
out = F.relu(self.conv1(x))
#print(out.size())
out = F.max_pool2d(out, 2)
#print(out.size())
out = F.relu(self.conv2(out))
#print(out.size())
out = F.max_pool2d(out, 2)
#print(out.size())
out = out.view(out.size(0), -1)
#print(out.size())
out = F.relu(self.fc1(out))
#print(out.size())
out = F.relu(self.fc2(out))
#print(out.size())
out = self.fc3(out)
#print(out.size())
#print("END")
return(out)

View File

@ -0,0 +1,42 @@
import torch.nn as nn
from layers.misc import FlattenLayer
def conv_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
#nn.init.xavier_uniform(m.weight, gain=np.sqrt(2))
nn.init.normal_(m.weight, mean=0, std=1)
nn.init.constant(m.bias, 0)
class ThreeConvThreeFC(nn.Module):
"""
To train on CIFAR-10:
https://arxiv.org/pdf/1207.0580.pdf
"""
def __init__(self, outputs, inputs):
super(ThreeConvThreeFC, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(inputs, 32, 5, stride=1, padding=2),
nn.Softplus(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(32, 64, 5, stride=1, padding=2),
nn.Softplus(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 128, 5, stride=1, padding=1),
nn.Softplus(),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
FlattenLayer(2 * 2 * 128),
nn.Linear(2 * 2 * 128, 1000),
nn.Softplus(),
nn.Linear(1000, 1000),
nn.Softplus(),
nn.Linear(1000, outputs)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x

36
run_service.py Executable file
View File

@ -0,0 +1,36 @@
import subprocess as sub
cmd = ["python", "main_frequentist.py"]
#cmd = ["python", "main_bayesian.py"]
cmd2 = ["./cpu_watt.sh","freq_1_cpu_watts"]
cmd3 = ["./mem_free.sh", "freq_1_ram_use"]
##from time import sleep
path = sub.check_output(['pwd'])
path = path.decode()
path = path.replace('\n', '')
startWattCounter = 'python ' + path + '/gpu_sample_draw.py'
#test = startNODE.split()
#test.append(pythonEnd)
#test.append(pythonEnd2)
#startNODE = test
##print(startNODE)
##print(startWattCounter)
p1 = sub.Popen(cmd)
#p2 = sub.Popen(startWattCounter.split())
p3 = sub.Popen(cmd2)
p4 = sub.Popen(cmd3)
retcode = p1.wait()
print(retcode)
p1.kill()
#p2.kill()
p3.kill()
p4.kill()

98
tests/test_models.py Executable file
View File

@ -0,0 +1,98 @@
import os
import sys
import math
import pytest
import unittest
import numpy as np
import torch
import torch.nn as nn
import utils
from models.BayesianModels.Bayesian3Conv3FC import BBB3Conv3FC
from models.BayesianModels.BayesianAlexNet import BBBAlexNet
from models.BayesianModels.BayesianLeNet import BBBLeNet
from models.NonBayesianModels.AlexNet import AlexNet
from models.NonBayesianModels.LeNet import LeNet
from models.NonBayesianModels.ThreeConvThreeFC import ThreeConvThreeFC
from layers.BBBConv import BBBConv2d
from layers.BBBLinear import BBBLinear
from layers.misc import FlattenLayer
cuda_available = torch.cuda.is_available()
bayesian_models = [BBBLeNet, BBBAlexNet, BBB3Conv3FC]
non_bayesian_models = [LeNet, AlexNet, ThreeConvThreeFC]
class TestModelForwardpass:
@pytest.mark.parametrize("model", bayesian_models)
def test_cpu_bayesian(self, model):
batch_size = np.random.randint(1, 256)
batch = torch.randn((batch_size, 3, 32, 32))
net = model(10, 3)
out = net(batch)
assert out[0].shape[0]==batch_size
@pytest.mark.parametrize("model", non_bayesian_models)
def test_cpu_frequentist(self, model):
batch_size = np.random.randint(1, 256)
batch = torch.randn((batch_size, 3, 32, 32))
net = model(10, 3)
out = net(batch)
assert out.shape[0]==batch_size
@pytest.mark.skipif(not cuda_available, reason="CUDA not available")
@pytest.mark.parametrize("model", bayesian_models)
def test_gpu_bayesian(self, model):
batch_size = np.random.randint(1, 256)
batch = torch.randn((batch_size, 3, 32, 32))
net = model(10, 3)
if cuda_available:
net = net.cuda()
batch = batch.cuda()
out = net(batch)
assert out[0].shape[0]==batch_size
@pytest.mark.skipif(not cuda_available, reason="CUDA not available")
@pytest.mark.parametrize("model", non_bayesian_models)
def test_gpu_frequentist(self, model):
batch_size = np.random.randint(1, 256)
batch = torch.randn((batch_size, 3, 32, 32))
net = model(10, 3)
if cuda_available:
net = net.cuda()
batch = batch.cuda()
out = net(batch)
assert out.shape[0]==batch_size
class TestBayesianLayers:
def test_flatten(self):
batch_size = np.random.randint(1, 256)
batch = torch.randn((batch_size, 64, 4, 4))
layer = FlattenLayer(4 * 4 * 64)
batch = layer(batch)
assert batch.shape[0]==batch_size
assert batch.shape[1]==(4 * 4 *64)
def test_conv(self):
batch_size = np.random.randint(1, 256)
batch = torch.randn((batch_size, 16, 24, 24))
layer = BBBConv2d(16, 6, 4, alpha_shape=(1,1), padding=0, bias=False)
batch = layer(batch)
assert batch.shape[0]==batch_size
assert batch.shape[1]==6
def test_linear(self):
batch_size = np.random.randint(1, 256)
batch = torch.randn((batch_size, 128))
layer = BBBLinear(128, 64, alpha_shape=(1,1), bias=False)
batch = layer(batch)
assert batch.shape[0]==batch_size
assert batch.shape[1]==64

184
uncertainty_estimation.py Executable file
View File

@ -0,0 +1,184 @@
import argparse
import torch
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image
import torchvision
from torch.nn import functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import data
from main_bayesian import getModel
import config_bayesian as cfg
# CUDA settings
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
mnist_set = None
notmnist_set = None
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((32, 32)),
transforms.ToTensor(),
])
def init_dataset(notmnist_dir):
global mnist_set
global notmnist_set
mnist_set, _, _, _ = data.getDataset('MNIST')
notmnist_set = torchvision.datasets.ImageFolder(root=notmnist_dir)
def get_uncertainty_per_image(model, input_image, T=15, normalized=False):
input_image = input_image.unsqueeze(0)
input_images = input_image.repeat(T, 1, 1, 1)
net_out, _ = model(input_images)
pred = torch.mean(net_out, dim=0).cpu().detach().numpy()
if normalized:
prediction = F.softplus(net_out)
p_hat = prediction / torch.sum(prediction, dim=1).unsqueeze(1)
else:
p_hat = F.softmax(net_out, dim=1)
p_hat = p_hat.detach().cpu().numpy()
p_bar = np.mean(p_hat, axis=0)
temp = p_hat - np.expand_dims(p_bar, 0)
epistemic = np.dot(temp.T, temp) / T
epistemic = np.diag(epistemic)
aleatoric = np.diag(p_bar) - (np.dot(p_hat.T, p_hat) / T)
aleatoric = np.diag(aleatoric)
return pred, epistemic, aleatoric
def get_uncertainty_per_batch(model, batch, T=15, normalized=False):
batch_predictions = []
net_outs = []
batches = batch.unsqueeze(0).repeat(T, 1, 1, 1, 1)
preds = []
epistemics = []
aleatorics = []
for i in range(T): # for T batches
net_out, _ = model(batches[i].cuda())
net_outs.append(net_out)
if normalized:
prediction = F.softplus(net_out)
prediction = prediction / torch.sum(prediction, dim=1).unsqueeze(1)
else:
prediction = F.softmax(net_out, dim=1)
batch_predictions.append(prediction)
for sample in range(batch.shape[0]):
# for each sample in a batch
pred = torch.cat([a_batch[sample].unsqueeze(0) for a_batch in net_outs], dim=0)
pred = torch.mean(pred, dim=0)
preds.append(pred)
p_hat = torch.cat([a_batch[sample].unsqueeze(0) for a_batch in batch_predictions], dim=0).detach().cpu().numpy()
p_bar = np.mean(p_hat, axis=0)
temp = p_hat - np.expand_dims(p_bar, 0)
epistemic = np.dot(temp.T, temp) / T
epistemic = np.diag(epistemic)
epistemics.append(epistemic)
aleatoric = np.diag(p_bar) - (np.dot(p_hat.T, p_hat) / T)
aleatoric = np.diag(aleatoric)
aleatorics.append(aleatoric)
epistemic = np.vstack(epistemics) # (batch_size, categories)
aleatoric = np.vstack(aleatorics) # (batch_size, categories)
preds = torch.cat([i.unsqueeze(0) for i in preds]).cpu().detach().numpy() # (batch_size, categories)
return preds, epistemic, aleatoric
def get_sample(dataset, sample_type='mnist'):
idx = np.random.randint(len(dataset.targets))
if sample_type=='mnist':
sample = dataset.data[idx]
truth = dataset.targets[idx]
else:
path, truth = dataset.samples[idx]
sample = torch.from_numpy(np.array(Image.open(path)))
sample = sample.unsqueeze(0)
sample = transform(sample)
return sample.to(device), truth
def run(net_type, weight_path, notmnist_dir):
init_dataset(notmnist_dir)
layer_type = cfg.layer_type
activation_type = cfg.activation_type
net = getModel(net_type, 1, 10, priors=None, layer_type=layer_type, activation_type=activation_type)
net.load_state_dict(torch.load(weight_path))
net.train()
net.to(device)
fig = plt.figure()
fig.suptitle('Uncertainty Estimation', fontsize='x-large')
mnist_img = fig.add_subplot(321)
notmnist_img = fig.add_subplot(322)
epi_stats_norm = fig.add_subplot(323)
ale_stats_norm = fig.add_subplot(324)
epi_stats_soft = fig.add_subplot(325)
ale_stats_soft = fig.add_subplot(326)
sample_mnist, truth_mnist = get_sample(mnist_set)
pred_mnist, epi_mnist_norm, ale_mnist_norm = get_uncertainty_per_image(net, sample_mnist, T=25, normalized=True)
pred_mnist, epi_mnist_soft, ale_mnist_soft = get_uncertainty_per_image(net, sample_mnist, T=25, normalized=False)
mnist_img.imshow(sample_mnist.squeeze().cpu(), cmap='gray')
mnist_img.axis('off')
mnist_img.set_title('MNIST Truth: {} Prediction: {}'.format(int(truth_mnist), int(np.argmax(pred_mnist))))
sample_notmnist, truth_notmnist = get_sample(notmnist_set, sample_type='notmnist')
pred_notmnist, epi_notmnist_norm, ale_notmnist_norm = get_uncertainty_per_image(net, sample_notmnist, T=25, normalized=True)
pred_notmnist, epi_notmnist_soft, ale_notmnist_soft = get_uncertainty_per_image(net, sample_notmnist, T=25, normalized=False)
notmnist_img.imshow(sample_notmnist.squeeze().cpu(), cmap='gray')
notmnist_img.axis('off')
notmnist_img.set_title('notMNIST Truth: {}({}) Prediction: {}({})'.format(
int(truth_notmnist), chr(65 + truth_notmnist), int(np.argmax(pred_notmnist)), chr(65 + np.argmax(pred_notmnist))))
x = list(range(10))
data = pd.DataFrame({
'epistemic_norm': np.hstack([epi_mnist_norm, epi_notmnist_norm]),
'aleatoric_norm': np.hstack([ale_mnist_norm, ale_notmnist_norm]),
'epistemic_soft': np.hstack([epi_mnist_soft, epi_notmnist_soft]),
'aleatoric_soft': np.hstack([ale_mnist_soft, ale_notmnist_soft]),
'category': np.hstack([x, x]),
'dataset': np.hstack([['MNIST']*10, ['notMNIST']*10])
})
print(data)
sns.barplot(x='category', y='epistemic_norm', hue='dataset', data=data, ax=epi_stats_norm)
sns.barplot(x='category', y='aleatoric_norm', hue='dataset', data=data, ax=ale_stats_norm)
epi_stats_norm.set_title('Epistemic Uncertainty (Normalized)')
ale_stats_norm.set_title('Aleatoric Uncertainty (Normalized)')
sns.barplot(x='category', y='epistemic_soft', hue='dataset', data=data, ax=epi_stats_soft)
sns.barplot(x='category', y='aleatoric_soft', hue='dataset', data=data, ax=ale_stats_soft)
epi_stats_soft.set_title('Epistemic Uncertainty (Softmax)')
ale_stats_soft.set_title('Aleatoric Uncertainty (Softmax)')
plt.show()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description = "PyTorch Uncertainty Estimation b/w MNIST and notMNIST")
parser.add_argument('--net_type', default='lenet', type=str, help='model')
parser.add_argument('--weights_path', default='checkpoints/MNIST/bayesian/model_lenet.pt', type=str, help='weights for model')
parser.add_argument('--notmnist_dir', default='data/notMNIST_small/', type=str, help='weights for model')
args = parser.parse_args()
run(args.net_type, args.weights_path, args.notmnist_dir)

44
utils.py Executable file
View File

@ -0,0 +1,44 @@
import os
import torch
import numpy as np
from torch.nn import functional as F
import config_bayesian as cfg
# cifar10 classes
cifar10_classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
def logmeanexp(x, dim=None, keepdim=False):
"""Stable computation of log(mean(exp(x))"""
if dim is None:
x, dim = x.view(-1), 0
x_max, _ = torch.max(x, dim, keepdim=True)
x = x_max + torch.log(torch.mean(torch.exp(x - x_max), dim, keepdim=True))
return x if keepdim else x.squeeze(dim)
# check if dimension is correct
# def dimension_check(x, dim=None, keepdim=False):
# if dim is None:
# x, dim = x.view(-1), 0
# return x if keepdim else x.squeeze(dim)
def adjust_learning_rate(optimizer, lr):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def save_array_to_file(numpy_array, filename):
file = open(filename, 'a')
shape = " ".join(map(str, numpy_array.shape))
np.savetxt(file, numpy_array.flatten(), newline=" ", fmt="%.3f")
file.write("\n")
file.close()