Iinitial commit

2021-01-26 17:45:49 +00:00 · 2021-01-26 17:45:49 +00:00 · ec60171ccc
commit ec60171ccc
7 changed files with 582 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+__pycache__/*
+.vscode/*
+probability/*
--- a/bayesian_resnet.py
+++ b/bayesian_resnet.py
@ -0,0 +1,232 @@
+# Copyright 2018 The TensorFlow Probability Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Trains a Bayesian neural network to classify CIFAR-10 images.
+The architecture can be either ResNet [1] or VGG [2].
+To run with default arguments:
+  ```
+  bazel run tensorflow_probability/examples:cifar10_bnn
+  ```
+#### References
+[1]: He, Kaiming, Xiangyu Zhang, Shaoqing Ren, and Jian Sun.
+     "Deep residual learning for image recognition."
+     _Proceedings of the IEEE_, 2016.
+     https://arxiv.org/abs/1512.03385
+[2]: Simonyan, Karen, and Andrew Zisserman.
+     "Very deep convolutional networks for large-scale image recognition."
+     arXiv preprint arXiv:1409.1556 (2014).
+     https://arxiv.org/pdf/1409.1556.pdf
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import warnings
+
+# Dependency imports
+import matplotlib
+import numpy as np
+import tensorflow.compat.v1 as tf
+import tensorflow_probability as tfp
+
+from probability.tensorflow_probability.examples.models.bayesian_resnet import bayesian_resnet
+from probability.tensorflow_probability.examples.models.bayesian_vgg import bayesian_vgg
+
+tf.compat.v1.disable_eager_execution()
+matplotlib.use("Agg")
+warnings.simplefilter(action="ignore")
+tfd = tfp.distributions
+
+IMAGE_SHAPE = [32, 32, 3]
+learning_rate = 0.0001
+epochs = 700
+batch_size = 128
+data_dir = os.path.join(os.getenv("TEST_TMPDIR", "/tmp"), "bayesian_neural_network/data")
+model_dir = os.path.join(os.getenv("TEST_TMPDIR", "/tmp"), "bayesian_neural_network/")
+eval_freq = 400
+num_monte_carlo = 50
+architecture = "resnet"
+kernel_posterior_scale_mean = -9.0
+kernel_posterior_scale_constraint = 0.2
+kl_annealing = 50
+subtract_pixel_mean = True
+fake_data = False
+
+
+def build_input_pipeline(x_train, x_test, y_train, y_test,
+                         batch_size, valid_size):
+  """Build an Iterator switching between train and heldout data."""
+
+  x_train = x_train.astype("float32")
+  x_test = x_test.astype("float32")
+
+  x_train /= 255
+  x_test /= 255
+
+  y_train = y_train.flatten()
+  y_test = y_test.flatten()
+
+  if subtract_pixel_mean:
+    x_train_mean = np.mean(x_train, axis=0)
+    x_train -= x_train_mean
+    x_test -= x_train_mean
+
+  print("x_train shape:" + str(x_train.shape))
+  print(str(x_train.shape[0]) + " train samples")
+  print(str(x_test.shape[0]) + " test samples")
+
+  # Build an iterator over training batches.
+  training_dataset = tf.data.Dataset.from_tensor_slices(
+      (x_train, np.int32(y_train)))
+  training_batches = training_dataset.shuffle(
+      50000, reshuffle_each_iteration=True).repeat().batch(batch_size)
+  training_iterator = tf.compat.v1.data.make_one_shot_iterator(training_batches)
+
+  # Build a iterator over the heldout set with batch_size=heldout_size,
+  # i.e., return the entire heldout set as a constant.
+  heldout_dataset = tf.data.Dataset.from_tensor_slices(
+      (x_test, np.int32(y_test)))
+  heldout_batches = heldout_dataset.repeat().batch(valid_size)
+  heldout_iterator = tf.compat.v1.data.make_one_shot_iterator(heldout_batches)
+
+  # Combine these into a feedable iterator that can switch between training
+  # and validation inputs.
+  handle = tf.compat.v1.placeholder(tf.string, shape=[])
+  feedable_iterator = tf.compat.v1.data.Iterator.from_string_handle(
+      handle, training_batches.output_types, training_batches.output_shapes)
+  images, labels = feedable_iterator.get_next()
+
+  return images, labels, handle, training_iterator, heldout_iterator
+
+
+def main():
+  #del argv  # unused
+  if tf.io.gfile.exists(model_dir):
+    tf.compat.v1.logging.warning(
+        "Warning: deleting old log directory at {}".format(model_dir))
+    tf.io.gfile.rmtree(model_dir)
+  tf.io.gfile.makedirs(model_dir)
+
+  if fake_data:
+    (x_train, y_train), (x_test, y_test) = build_fake_data()
+  else:
+    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
+
+  (images, labels, handle, training_iterator, heldout_iterator) = build_input_pipeline(x_train, x_test, y_train, y_test, batch_size, 500)
+
+  if architecture == "resnet":
+    model_fn = bayesian_resnet
+  else:
+    model_fn = bayesian_vgg
+
+  model = model_fn(
+      IMAGE_SHAPE,
+      num_classes=10,
+      kernel_posterior_scale_mean=kernel_posterior_scale_mean,
+      kernel_posterior_scale_constraint=kernel_posterior_scale_constraint)
+  logits = model(images)
+  labels_distribution = tfd.Categorical(logits=logits)
+
+  # Perform KL annealing. The optimal number of annealing steps
+  # depends on the dataset and architecture.
+  t = tf.compat.v2.Variable(0.0)
+  kl_regularizer = t / (kl_annealing * len(x_train) / batch_size)
+
+  # Compute the -ELBO as the loss. The kl term is annealed from 0 to 1 over
+  # the epochs specified by the kl_annealing flag.
+  log_likelihood = labels_distribution.log_prob(labels)
+  neg_log_likelihood = -tf.reduce_mean(input_tensor=log_likelihood)
+  kl = sum(model.losses) / len(x_train) * tf.minimum(1.0, kl_regularizer)
+  loss = neg_log_likelihood + kl
+
+  # Build metrics for evaluation. Predictions are formed from a single forward
+  # pass of the probabilistic layers. They are cheap but noisy
+  # predictions.
+  predictions = tf.argmax(input=logits, axis=1)
+  with tf.compat.v1.name_scope("train"):
+    train_accuracy, train_accuracy_update_op = tf.compat.v1.metrics.accuracy(
+        labels=labels, predictions=predictions)
+    opt = tf.compat.v1.train.AdamOptimizer(learning_rate)
+    train_op = opt.minimize(loss)
+    update_step_op = tf.compat.v1.assign(t, t + 1)
+
+  with tf.compat.v1.name_scope("valid"):
+    valid_accuracy, valid_accuracy_update_op = tf.compat.v1.metrics.accuracy(
+        labels=labels, predictions=predictions)
+
+  init_op = tf.group(tf.compat.v1.global_variables_initializer(),
+                     tf.compat.v1.local_variables_initializer())
+
+  stream_vars_valid = [
+      v for v in tf.compat.v1.local_variables() if "valid/" in v.name
+  ]
+  reset_valid_op = tf.compat.v1.variables_initializer(stream_vars_valid)
+
+  with tf.compat.v1.Session() as sess:
+    sess.run(init_op)
+
+    # Run the training loop
+    train_handle = sess.run(training_iterator.string_handle())
+    heldout_handle = sess.run(heldout_iterator.string_handle())
+    training_steps = int(
+        round(epochs * (len(x_train) / batch_size)))
+    for step in range(training_steps):
+      _ = sess.run([train_op,
+                    train_accuracy_update_op,
+                    update_step_op],
+                   feed_dict={handle: train_handle})
+
+      # Manually print the frequency
+      if step % 100 == 0:
+        loss_value, accuracy_value, kl_value = sess.run(
+            [loss, train_accuracy, kl], feed_dict={handle: train_handle})
+        print(
+            "Step: {:>3d} Loss: {:.3f} Accuracy: {:.3f} KL: {:.3f}".format(
+                step, loss_value, accuracy_value, kl_value))
+
+      if (step + 1) % eval_freq == 0:
+        # Compute log prob of heldout set by averaging draws from the model:
+        # p(heldout | train) = int_model p(heldout|model) p(model|train)
+        #                   ~= 1/n * sum_{i=1}^n p(heldout | model_i)
+        # where model_i is a draw from the posterior
+        # p(model|train).
+        probs = np.asarray([sess.run((labels_distribution.probs),
+                                     feed_dict={handle: heldout_handle})
+                            for _ in range(num_monte_carlo)])
+        mean_probs = np.mean(probs, axis=0)
+
+        _, label_vals = sess.run(
+            (images, labels), feed_dict={handle: heldout_handle})
+        heldout_lp = np.mean(np.log(mean_probs[np.arange(mean_probs.shape[0]),
+                                               label_vals.flatten()]))
+        print(" ... Held-out nats: {:.3f}".format(heldout_lp))
+
+        # Calculate validation accuracy
+        for _ in range(20):
+          sess.run(
+              valid_accuracy_update_op, feed_dict={handle: heldout_handle})
+        valid_value = sess.run(
+            valid_accuracy, feed_dict={handle: heldout_handle})
+
+        print(
+            " ... Validation Accuracy: {:.3f}".format(valid_value))
+
+        sess.run(reset_valid_op)
+
+
+if __name__ == "__main__":
+  #tf.compat.v1.app.run()
+  main()
--- a/data_acquisition.py
+++ b/data_acquisition.py
@ -0,0 +1,49 @@
+#loading dataset
+import tensorflow as tf
+from tensorflow import keras
+import numpy as np
+#import matplotlib.pyplot as plt
+#from tensorflow._api.v2 import data
+
+
+def cifar10():
+    cifar = keras.datasets.cifar10 
+    train, test = cifar.load_data()
+    
+    return train, test
+
+
+@tf.autograph.experimental.do_not_convert
+def data_preparation(train_tuple,test_tuple,train_batch=64,train_shuffle=10000,test_batch=5000,test_shuffle=10000):
+    train_dataset = tf.data.Dataset.from_tensor_slices(train_tuple).batch(train_batch).shuffle(train_shuffle)
+    train_dataset = train_dataset.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))
+    #train_dataset = train_dataset.map(lambda x, y: (tf.image.central_crop(x, 0.75), y))
+    #train_dataset = train_dataset.map(lambda x, y: (tf.image.random_flip_left_right(x), y))
+    #train_dataset = train_dataset.repeat()
+    train_dataset = tf.data.Dataset.zip(train_dataset)
+    train_count = len(train_tuple[0])
+
+
+    test_dataset = tf.data.Dataset.from_tensor_slices(test_tuple).batch(test_batch).shuffle(test_shuffle)
+    test_dataset = test_dataset.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))
+    #test_dataset = test_dataset.map(lambda x, y: (tf.image.central_crop(x, 0.75), y))
+    #test_dataset = test_dataset.repeat()
+    test_dataset = tf.data.Dataset.zip(test_dataset)
+    test_count = len(test_tuple[0])
+
+    return train_dataset, train_count, test_dataset, test_count
+
+
+def build_fake_data(num_examples,IMAGE_SHAPE):
+    x_train = np.random.rand(num_examples, *IMAGE_SHAPE).astype(np.float32)
+    y_train = np.random.permutation(np.arange(num_examples)).astype(np.int32)
+    x_test = np.random.rand(num_examples, *IMAGE_SHAPE).astype(np.float32)
+    y_test = np.random.permutation(np.arange(num_examples)).astype(np.int32)
+  
+    return (x_train, y_train), (x_test, y_test)
+
+def get_data():
+    train, test = cifar10()
+    train_dataset,train_count,test_dataset,test_count = data_preparation(train,test)
+    
+    return train_dataset,train_count,test_dataset,test_count
--- a/hyper.py
+++ b/hyper.py
@ -0,0 +1,22 @@
+from os import name
+from tensorflow import keras
+
+#define label
+label = ['Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck']
+NUM_CLASSES = len(label)
+IMAGE_SHAPE = [32, 32, 3]
+
+# Experiment parameters
+EPOCHS = 50
+BATCH_SIZE = 128
+
+# Loss function
+mse_loss = keras.losses.MeanSquaredError()
+scce_loss = keras.losses.SparseCategoricalCrossentropy()
+mean_loss = keras.metrics.Mean(name='train_loss')
+sccea_loss = keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
+test_loss = keras.metrics.Mean(name='test_loss')
+test_accuracy = keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
+
+# Optimizer
+adadelta = keras.optimizers.Adadelta()
--- a/resnet.py
+++ b/resnet.py
@ -0,0 +1,108 @@
+import tensorflow as tf
+from hyper import NUM_CLASSES
+from resnet_block import make_basic_block_layer, make_bottleneck_layer
+
+
+class ResNetTypeI(tf.keras.Model):
+    def __init__(self, layer_params):
+        super(ResNetTypeI, self).__init__()
+
+        self.conv1 = tf.keras.layers.Conv2D(filters=64,
+                                            kernel_size=(7, 7),
+                                            strides=2,
+                                            padding="same")
+        self.bn1 = tf.keras.layers.BatchNormalization()
+        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
+                                               strides=2,
+                                               padding="same")
+
+        self.layer1 = make_basic_block_layer(filter_num=64,
+                                             blocks=layer_params[0])
+        self.layer2 = make_basic_block_layer(filter_num=128,
+                                             blocks=layer_params[1],
+                                             stride=2)
+        self.layer3 = make_basic_block_layer(filter_num=256,
+                                             blocks=layer_params[2],
+                                             stride=2)
+        self.layer4 = make_basic_block_layer(filter_num=512,
+                                             blocks=layer_params[3],
+                                             stride=2)
+
+        self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
+        self.fc = tf.keras.layers.Dense(units=NUM_CLASSES, activation=tf.keras.activations.softmax)
+
+    def call(self, inputs, training=None, mask=None):
+        x = self.conv1(inputs)
+        x = self.bn1(x, training=training)
+        x = tf.nn.relu(x)
+        x = self.pool1(x)
+        x = self.layer1(x, training=training)
+        x = self.layer2(x, training=training)
+        x = self.layer3(x, training=training)
+        x = self.layer4(x, training=training)
+        x = self.avgpool(x)
+        output = self.fc(x)
+
+        return output
+
+
+class ResNetTypeII(tf.keras.Model):
+    def __init__(self, layer_params):
+        super(ResNetTypeII, self).__init__()
+        self.conv1 = tf.keras.layers.Conv2D(filters=64,
+                                            kernel_size=(7, 7),
+                                            strides=2,
+                                            padding="same")
+        self.bn1 = tf.keras.layers.BatchNormalization()
+        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
+                                               strides=2,
+                                               padding="same")
+
+        self.layer1 = make_bottleneck_layer(filter_num=64,
+                                            blocks=layer_params[0])
+        self.layer2 = make_bottleneck_layer(filter_num=128,
+                                            blocks=layer_params[1],
+                                            stride=2)
+        self.layer3 = make_bottleneck_layer(filter_num=256,
+                                            blocks=layer_params[2],
+                                            stride=2)
+        self.layer4 = make_bottleneck_layer(filter_num=512,
+                                            blocks=layer_params[3],
+                                            stride=2)
+
+        self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
+        self.fc = tf.keras.layers.Dense(units=NUM_CLASSES, activation=tf.keras.activations.softmax)
+
+    def call(self, inputs, training=None, mask=None):
+        x = self.conv1(inputs)
+        x = self.bn1(x, training=training)
+        x = tf.nn.relu(x)
+        x = self.pool1(x)
+        x = self.layer1(x, training=training)
+        x = self.layer2(x, training=training)
+        x = self.layer3(x, training=training)
+        x = self.layer4(x, training=training)
+        x = self.avgpool(x)
+        output = self.fc(x)
+
+        return output
+
+
+def resnet_18():
+    return ResNetTypeI(layer_params=[2, 2, 2, 2])
+
+
+def resnet_34():
+    return ResNetTypeI(layer_params=[3, 4, 6, 3])
+
+
+def resnet_50():
+    return ResNetTypeII(layer_params=[3, 4, 6, 3])
+
+
+def resnet_101():
+    return ResNetTypeII(layer_params=[3, 4, 23, 3])
+
+
+def resnet_152():
+    return ResNetTypeII(layer_params=[3, 8, 36, 3])
--- a/resnet_block.py
+++ b/resnet_block.py
@ -0,0 +1,100 @@
+import tensorflow as tf
+
+
+class BasicBlock(tf.keras.layers.Layer):
+
+    def __init__(self, filter_num, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = tf.keras.layers.Conv2D(filters=filter_num,
+                                            kernel_size=(3, 3),
+                                            strides=stride,
+                                            padding="same")
+        self.bn1 = tf.keras.layers.BatchNormalization()
+        self.conv2 = tf.keras.layers.Conv2D(filters=filter_num,
+                                            kernel_size=(3, 3),
+                                            strides=1,
+                                            padding="same")
+        self.bn2 = tf.keras.layers.BatchNormalization()
+        if stride != 1:
+            self.downsample = tf.keras.Sequential()
+            self.downsample.add(tf.keras.layers.Conv2D(filters=filter_num,
+                                                       kernel_size=(1, 1),
+                                                       strides=stride))
+            self.downsample.add(tf.keras.layers.BatchNormalization())
+        else:
+            self.downsample = lambda x: x
+
+    def call(self, inputs, training=None, **kwargs):
+        residual = self.downsample(inputs)
+
+        x = self.conv1(inputs)
+        x = self.bn1(x, training=training)
+        x = tf.nn.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x, training=training)
+
+        output = tf.nn.relu(tf.keras.layers.add([residual, x]))
+
+        return output
+
+
+class BottleNeck(tf.keras.layers.Layer):
+    def __init__(self, filter_num, stride=1):
+        super(BottleNeck, self).__init__()
+        self.conv1 = tf.keras.layers.Conv2D(filters=filter_num,
+                                            kernel_size=(1, 1),
+                                            strides=1,
+                                            padding='same')
+        self.bn1 = tf.keras.layers.BatchNormalization()
+        self.conv2 = tf.keras.layers.Conv2D(filters=filter_num,
+                                            kernel_size=(3, 3),
+                                            strides=stride,
+                                            padding='same')
+        self.bn2 = tf.keras.layers.BatchNormalization()
+        self.conv3 = tf.keras.layers.Conv2D(filters=filter_num * 4,
+                                            kernel_size=(1, 1),
+                                            strides=1,
+                                            padding='same')
+        self.bn3 = tf.keras.layers.BatchNormalization()
+
+        self.downsample = tf.keras.Sequential()
+        self.downsample.add(tf.keras.layers.Conv2D(filters=filter_num * 4,
+                                                   kernel_size=(1, 1),
+                                                   strides=stride))
+        self.downsample.add(tf.keras.layers.BatchNormalization())
+
+    def call(self, inputs, training=None, **kwargs):
+        residual = self.downsample(inputs)
+
+        x = self.conv1(inputs)
+        x = self.bn1(x, training=training)
+        x = tf.nn.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x, training=training)
+        x = tf.nn.relu(x)
+        x = self.conv3(x)
+        x = self.bn3(x, training=training)
+
+        output = tf.nn.relu(tf.keras.layers.add([residual, x]))
+
+        return output
+
+
+def make_basic_block_layer(filter_num, blocks, stride=1):
+    res_block = tf.keras.Sequential()
+    res_block.add(BasicBlock(filter_num, stride=stride))
+
+    for _ in range(1, blocks):
+        res_block.add(BasicBlock(filter_num, stride=1))
+
+    return res_block
+
+
+def make_bottleneck_layer(filter_num, blocks, stride=1):
+    res_block = tf.keras.Sequential()
+    res_block.add(BottleNeck(filter_num, stride=stride))
+
+    for _ in range(1, blocks):
+        res_block.add(BottleNeck(filter_num, stride=1))
+
+    return res_block
--- a/train_resnet.py
+++ b/train_resnet.py
@ -0,0 +1,68 @@
+from __future__ import absolute_import, division, print_function
+import tensorflow as tf
+from tensorflow.python.keras.engine import training
+from tensorflow.python.ops.gradients_impl import gradients
+from data_acquisition import get_data
+from resnet import resnet_18
+import hyper
+import math
+
+if __name__ == '__main__':
+    model = resnet_18()
+    model.build(input_shape=(None, hyper.IMAGE_SHAPE[0], hyper.IMAGE_SHAPE[1], hyper.IMAGE_SHAPE[2]))
+    model.summary()
+
+    train_dataset,train_count,test_dataset,test_count = get_data()
+
+    # GPU settings
+    gpus = tf.config.experimental.list_physical_devices('GPU')
+    if gpus:
+        for gpu in gpus:
+            tf.config.experimental.set_memory_growth(gpu, True)
+
+    @tf.function
+    def train_step(images, labels):
+        with tf.GradientTape() as tape:
+            predictions = model(images, training=True)
+            loss = hyper.scce_loss(y_true=labels,y_pred=predictions)
+        gradients = tape.gradient(loss,model.trainable_variables)
+        hyper.adadelta.apply_gradients(grads_and_vars=zip(gradients, model.trainable_variables))
+
+        hyper.mean_loss(loss)
+        hyper.sccea_loss(labels,predictions)
+
+
+    @tf.function
+    def valid_step(images, labels):
+        predictions = model(images, training=False)
+        v_loss = hyper.scce_loss(labels, predictions)
+
+        hyper.test_loss(v_loss)
+        hyper.test_accuracy(labels, predictions)
+
+
+
+    for epoch in range(hyper.EPOCHS):
+        hyper.mean_loss.reset_states()
+        hyper.sccea_loss.reset_states()
+        step = 0
+        for images, labels in train_dataset:
+            step += 1
+            train_step(images, labels)
+            print("Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}".format(epoch + 1,
+                                                                                     hyper.EPOCHS,
+                                                                                     step,
+                                                                                     math.ceil(train_count / hyper.BATCH_SIZE),
+                                                                                     hyper.mean_loss.result(),
+                                                                                     hyper.sccea_loss.result()))
+        for valid_images, valid_labels in test_dataset:
+            valid_step(valid_images, valid_labels)
+
+        print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
+              "valid loss: {:.5f}, valid accuracy: {:.5f}".format(epoch + 1,
+                                                                  hyper.EPOCHS,
+                                                                  hyper.mean_loss.result(),
+                                                                  hyper.sccea_loss.result(),
+                                                                  hyper.test_loss.result(),
+                                                                  hyper.test_accuracy.result()))
+