Iinitial commit
This commit is contained in:
commit
ec60171ccc
|
@ -0,0 +1,3 @@
|
||||||
|
__pycache__/*
|
||||||
|
.vscode/*
|
||||||
|
probability/*
|
|
@ -0,0 +1,232 @@
|
||||||
|
# Copyright 2018 The TensorFlow Probability Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""Trains a Bayesian neural network to classify CIFAR-10 images.
|
||||||
|
The architecture can be either ResNet [1] or VGG [2].
|
||||||
|
To run with default arguments:
|
||||||
|
```
|
||||||
|
bazel run tensorflow_probability/examples:cifar10_bnn
|
||||||
|
```
|
||||||
|
#### References
|
||||||
|
[1]: He, Kaiming, Xiangyu Zhang, Shaoqing Ren, and Jian Sun.
|
||||||
|
"Deep residual learning for image recognition."
|
||||||
|
_Proceedings of the IEEE_, 2016.
|
||||||
|
https://arxiv.org/abs/1512.03385
|
||||||
|
[2]: Simonyan, Karen, and Andrew Zisserman.
|
||||||
|
"Very deep convolutional networks for large-scale image recognition."
|
||||||
|
arXiv preprint arXiv:1409.1556 (2014).
|
||||||
|
https://arxiv.org/pdf/1409.1556.pdf
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import os
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
# Dependency imports
|
||||||
|
import matplotlib
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow.compat.v1 as tf
|
||||||
|
import tensorflow_probability as tfp
|
||||||
|
|
||||||
|
from probability.tensorflow_probability.examples.models.bayesian_resnet import bayesian_resnet
|
||||||
|
from probability.tensorflow_probability.examples.models.bayesian_vgg import bayesian_vgg
|
||||||
|
|
||||||
|
tf.compat.v1.disable_eager_execution()
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
warnings.simplefilter(action="ignore")
|
||||||
|
tfd = tfp.distributions
|
||||||
|
|
||||||
|
IMAGE_SHAPE = [32, 32, 3]
|
||||||
|
learning_rate = 0.0001
|
||||||
|
epochs = 700
|
||||||
|
batch_size = 128
|
||||||
|
data_dir = os.path.join(os.getenv("TEST_TMPDIR", "/tmp"), "bayesian_neural_network/data")
|
||||||
|
model_dir = os.path.join(os.getenv("TEST_TMPDIR", "/tmp"), "bayesian_neural_network/")
|
||||||
|
eval_freq = 400
|
||||||
|
num_monte_carlo = 50
|
||||||
|
architecture = "resnet"
|
||||||
|
kernel_posterior_scale_mean = -9.0
|
||||||
|
kernel_posterior_scale_constraint = 0.2
|
||||||
|
kl_annealing = 50
|
||||||
|
subtract_pixel_mean = True
|
||||||
|
fake_data = False
|
||||||
|
|
||||||
|
|
||||||
|
def build_input_pipeline(x_train, x_test, y_train, y_test,
|
||||||
|
batch_size, valid_size):
|
||||||
|
"""Build an Iterator switching between train and heldout data."""
|
||||||
|
|
||||||
|
x_train = x_train.astype("float32")
|
||||||
|
x_test = x_test.astype("float32")
|
||||||
|
|
||||||
|
x_train /= 255
|
||||||
|
x_test /= 255
|
||||||
|
|
||||||
|
y_train = y_train.flatten()
|
||||||
|
y_test = y_test.flatten()
|
||||||
|
|
||||||
|
if subtract_pixel_mean:
|
||||||
|
x_train_mean = np.mean(x_train, axis=0)
|
||||||
|
x_train -= x_train_mean
|
||||||
|
x_test -= x_train_mean
|
||||||
|
|
||||||
|
print("x_train shape:" + str(x_train.shape))
|
||||||
|
print(str(x_train.shape[0]) + " train samples")
|
||||||
|
print(str(x_test.shape[0]) + " test samples")
|
||||||
|
|
||||||
|
# Build an iterator over training batches.
|
||||||
|
training_dataset = tf.data.Dataset.from_tensor_slices(
|
||||||
|
(x_train, np.int32(y_train)))
|
||||||
|
training_batches = training_dataset.shuffle(
|
||||||
|
50000, reshuffle_each_iteration=True).repeat().batch(batch_size)
|
||||||
|
training_iterator = tf.compat.v1.data.make_one_shot_iterator(training_batches)
|
||||||
|
|
||||||
|
# Build a iterator over the heldout set with batch_size=heldout_size,
|
||||||
|
# i.e., return the entire heldout set as a constant.
|
||||||
|
heldout_dataset = tf.data.Dataset.from_tensor_slices(
|
||||||
|
(x_test, np.int32(y_test)))
|
||||||
|
heldout_batches = heldout_dataset.repeat().batch(valid_size)
|
||||||
|
heldout_iterator = tf.compat.v1.data.make_one_shot_iterator(heldout_batches)
|
||||||
|
|
||||||
|
# Combine these into a feedable iterator that can switch between training
|
||||||
|
# and validation inputs.
|
||||||
|
handle = tf.compat.v1.placeholder(tf.string, shape=[])
|
||||||
|
feedable_iterator = tf.compat.v1.data.Iterator.from_string_handle(
|
||||||
|
handle, training_batches.output_types, training_batches.output_shapes)
|
||||||
|
images, labels = feedable_iterator.get_next()
|
||||||
|
|
||||||
|
return images, labels, handle, training_iterator, heldout_iterator
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
#del argv # unused
|
||||||
|
if tf.io.gfile.exists(model_dir):
|
||||||
|
tf.compat.v1.logging.warning(
|
||||||
|
"Warning: deleting old log directory at {}".format(model_dir))
|
||||||
|
tf.io.gfile.rmtree(model_dir)
|
||||||
|
tf.io.gfile.makedirs(model_dir)
|
||||||
|
|
||||||
|
if fake_data:
|
||||||
|
(x_train, y_train), (x_test, y_test) = build_fake_data()
|
||||||
|
else:
|
||||||
|
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
|
||||||
|
|
||||||
|
(images, labels, handle, training_iterator, heldout_iterator) = build_input_pipeline(x_train, x_test, y_train, y_test, batch_size, 500)
|
||||||
|
|
||||||
|
if architecture == "resnet":
|
||||||
|
model_fn = bayesian_resnet
|
||||||
|
else:
|
||||||
|
model_fn = bayesian_vgg
|
||||||
|
|
||||||
|
model = model_fn(
|
||||||
|
IMAGE_SHAPE,
|
||||||
|
num_classes=10,
|
||||||
|
kernel_posterior_scale_mean=kernel_posterior_scale_mean,
|
||||||
|
kernel_posterior_scale_constraint=kernel_posterior_scale_constraint)
|
||||||
|
logits = model(images)
|
||||||
|
labels_distribution = tfd.Categorical(logits=logits)
|
||||||
|
|
||||||
|
# Perform KL annealing. The optimal number of annealing steps
|
||||||
|
# depends on the dataset and architecture.
|
||||||
|
t = tf.compat.v2.Variable(0.0)
|
||||||
|
kl_regularizer = t / (kl_annealing * len(x_train) / batch_size)
|
||||||
|
|
||||||
|
# Compute the -ELBO as the loss. The kl term is annealed from 0 to 1 over
|
||||||
|
# the epochs specified by the kl_annealing flag.
|
||||||
|
log_likelihood = labels_distribution.log_prob(labels)
|
||||||
|
neg_log_likelihood = -tf.reduce_mean(input_tensor=log_likelihood)
|
||||||
|
kl = sum(model.losses) / len(x_train) * tf.minimum(1.0, kl_regularizer)
|
||||||
|
loss = neg_log_likelihood + kl
|
||||||
|
|
||||||
|
# Build metrics for evaluation. Predictions are formed from a single forward
|
||||||
|
# pass of the probabilistic layers. They are cheap but noisy
|
||||||
|
# predictions.
|
||||||
|
predictions = tf.argmax(input=logits, axis=1)
|
||||||
|
with tf.compat.v1.name_scope("train"):
|
||||||
|
train_accuracy, train_accuracy_update_op = tf.compat.v1.metrics.accuracy(
|
||||||
|
labels=labels, predictions=predictions)
|
||||||
|
opt = tf.compat.v1.train.AdamOptimizer(learning_rate)
|
||||||
|
train_op = opt.minimize(loss)
|
||||||
|
update_step_op = tf.compat.v1.assign(t, t + 1)
|
||||||
|
|
||||||
|
with tf.compat.v1.name_scope("valid"):
|
||||||
|
valid_accuracy, valid_accuracy_update_op = tf.compat.v1.metrics.accuracy(
|
||||||
|
labels=labels, predictions=predictions)
|
||||||
|
|
||||||
|
init_op = tf.group(tf.compat.v1.global_variables_initializer(),
|
||||||
|
tf.compat.v1.local_variables_initializer())
|
||||||
|
|
||||||
|
stream_vars_valid = [
|
||||||
|
v for v in tf.compat.v1.local_variables() if "valid/" in v.name
|
||||||
|
]
|
||||||
|
reset_valid_op = tf.compat.v1.variables_initializer(stream_vars_valid)
|
||||||
|
|
||||||
|
with tf.compat.v1.Session() as sess:
|
||||||
|
sess.run(init_op)
|
||||||
|
|
||||||
|
# Run the training loop
|
||||||
|
train_handle = sess.run(training_iterator.string_handle())
|
||||||
|
heldout_handle = sess.run(heldout_iterator.string_handle())
|
||||||
|
training_steps = int(
|
||||||
|
round(epochs * (len(x_train) / batch_size)))
|
||||||
|
for step in range(training_steps):
|
||||||
|
_ = sess.run([train_op,
|
||||||
|
train_accuracy_update_op,
|
||||||
|
update_step_op],
|
||||||
|
feed_dict={handle: train_handle})
|
||||||
|
|
||||||
|
# Manually print the frequency
|
||||||
|
if step % 100 == 0:
|
||||||
|
loss_value, accuracy_value, kl_value = sess.run(
|
||||||
|
[loss, train_accuracy, kl], feed_dict={handle: train_handle})
|
||||||
|
print(
|
||||||
|
"Step: {:>3d} Loss: {:.3f} Accuracy: {:.3f} KL: {:.3f}".format(
|
||||||
|
step, loss_value, accuracy_value, kl_value))
|
||||||
|
|
||||||
|
if (step + 1) % eval_freq == 0:
|
||||||
|
# Compute log prob of heldout set by averaging draws from the model:
|
||||||
|
# p(heldout | train) = int_model p(heldout|model) p(model|train)
|
||||||
|
# ~= 1/n * sum_{i=1}^n p(heldout | model_i)
|
||||||
|
# where model_i is a draw from the posterior
|
||||||
|
# p(model|train).
|
||||||
|
probs = np.asarray([sess.run((labels_distribution.probs),
|
||||||
|
feed_dict={handle: heldout_handle})
|
||||||
|
for _ in range(num_monte_carlo)])
|
||||||
|
mean_probs = np.mean(probs, axis=0)
|
||||||
|
|
||||||
|
_, label_vals = sess.run(
|
||||||
|
(images, labels), feed_dict={handle: heldout_handle})
|
||||||
|
heldout_lp = np.mean(np.log(mean_probs[np.arange(mean_probs.shape[0]),
|
||||||
|
label_vals.flatten()]))
|
||||||
|
print(" ... Held-out nats: {:.3f}".format(heldout_lp))
|
||||||
|
|
||||||
|
# Calculate validation accuracy
|
||||||
|
for _ in range(20):
|
||||||
|
sess.run(
|
||||||
|
valid_accuracy_update_op, feed_dict={handle: heldout_handle})
|
||||||
|
valid_value = sess.run(
|
||||||
|
valid_accuracy, feed_dict={handle: heldout_handle})
|
||||||
|
|
||||||
|
print(
|
||||||
|
" ... Validation Accuracy: {:.3f}".format(valid_value))
|
||||||
|
|
||||||
|
sess.run(reset_valid_op)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
#tf.compat.v1.app.run()
|
||||||
|
main()
|
|
@ -0,0 +1,49 @@
|
||||||
|
#loading dataset
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
import numpy as np
|
||||||
|
#import matplotlib.pyplot as plt
|
||||||
|
#from tensorflow._api.v2 import data
|
||||||
|
|
||||||
|
|
||||||
|
def cifar10():
|
||||||
|
cifar = keras.datasets.cifar10
|
||||||
|
train, test = cifar.load_data()
|
||||||
|
|
||||||
|
return train, test
|
||||||
|
|
||||||
|
|
||||||
|
@tf.autograph.experimental.do_not_convert
|
||||||
|
def data_preparation(train_tuple,test_tuple,train_batch=64,train_shuffle=10000,test_batch=5000,test_shuffle=10000):
|
||||||
|
train_dataset = tf.data.Dataset.from_tensor_slices(train_tuple).batch(train_batch).shuffle(train_shuffle)
|
||||||
|
train_dataset = train_dataset.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))
|
||||||
|
#train_dataset = train_dataset.map(lambda x, y: (tf.image.central_crop(x, 0.75), y))
|
||||||
|
#train_dataset = train_dataset.map(lambda x, y: (tf.image.random_flip_left_right(x), y))
|
||||||
|
#train_dataset = train_dataset.repeat()
|
||||||
|
train_dataset = tf.data.Dataset.zip(train_dataset)
|
||||||
|
train_count = len(train_tuple[0])
|
||||||
|
|
||||||
|
|
||||||
|
test_dataset = tf.data.Dataset.from_tensor_slices(test_tuple).batch(test_batch).shuffle(test_shuffle)
|
||||||
|
test_dataset = test_dataset.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))
|
||||||
|
#test_dataset = test_dataset.map(lambda x, y: (tf.image.central_crop(x, 0.75), y))
|
||||||
|
#test_dataset = test_dataset.repeat()
|
||||||
|
test_dataset = tf.data.Dataset.zip(test_dataset)
|
||||||
|
test_count = len(test_tuple[0])
|
||||||
|
|
||||||
|
return train_dataset, train_count, test_dataset, test_count
|
||||||
|
|
||||||
|
|
||||||
|
def build_fake_data(num_examples,IMAGE_SHAPE):
|
||||||
|
x_train = np.random.rand(num_examples, *IMAGE_SHAPE).astype(np.float32)
|
||||||
|
y_train = np.random.permutation(np.arange(num_examples)).astype(np.int32)
|
||||||
|
x_test = np.random.rand(num_examples, *IMAGE_SHAPE).astype(np.float32)
|
||||||
|
y_test = np.random.permutation(np.arange(num_examples)).astype(np.int32)
|
||||||
|
|
||||||
|
return (x_train, y_train), (x_test, y_test)
|
||||||
|
|
||||||
|
def get_data():
|
||||||
|
train, test = cifar10()
|
||||||
|
train_dataset,train_count,test_dataset,test_count = data_preparation(train,test)
|
||||||
|
|
||||||
|
return train_dataset,train_count,test_dataset,test_count
|
|
@ -0,0 +1,22 @@
|
||||||
|
from os import name
|
||||||
|
from tensorflow import keras
|
||||||
|
|
||||||
|
#define label
|
||||||
|
label = ['Airplane', 'Automobile', 'Bird', 'Cat', 'Deer', 'Dog', 'Frog', 'Horse', 'Ship', 'Truck']
|
||||||
|
NUM_CLASSES = len(label)
|
||||||
|
IMAGE_SHAPE = [32, 32, 3]
|
||||||
|
|
||||||
|
# Experiment parameters
|
||||||
|
EPOCHS = 50
|
||||||
|
BATCH_SIZE = 128
|
||||||
|
|
||||||
|
# Loss function
|
||||||
|
mse_loss = keras.losses.MeanSquaredError()
|
||||||
|
scce_loss = keras.losses.SparseCategoricalCrossentropy()
|
||||||
|
mean_loss = keras.metrics.Mean(name='train_loss')
|
||||||
|
sccea_loss = keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
|
||||||
|
test_loss = keras.metrics.Mean(name='test_loss')
|
||||||
|
test_accuracy = keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
|
||||||
|
|
||||||
|
# Optimizer
|
||||||
|
adadelta = keras.optimizers.Adadelta()
|
|
@ -0,0 +1,108 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
from hyper import NUM_CLASSES
|
||||||
|
from resnet_block import make_basic_block_layer, make_bottleneck_layer
|
||||||
|
|
||||||
|
|
||||||
|
class ResNetTypeI(tf.keras.Model):
|
||||||
|
def __init__(self, layer_params):
|
||||||
|
super(ResNetTypeI, self).__init__()
|
||||||
|
|
||||||
|
self.conv1 = tf.keras.layers.Conv2D(filters=64,
|
||||||
|
kernel_size=(7, 7),
|
||||||
|
strides=2,
|
||||||
|
padding="same")
|
||||||
|
self.bn1 = tf.keras.layers.BatchNormalization()
|
||||||
|
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
|
||||||
|
strides=2,
|
||||||
|
padding="same")
|
||||||
|
|
||||||
|
self.layer1 = make_basic_block_layer(filter_num=64,
|
||||||
|
blocks=layer_params[0])
|
||||||
|
self.layer2 = make_basic_block_layer(filter_num=128,
|
||||||
|
blocks=layer_params[1],
|
||||||
|
stride=2)
|
||||||
|
self.layer3 = make_basic_block_layer(filter_num=256,
|
||||||
|
blocks=layer_params[2],
|
||||||
|
stride=2)
|
||||||
|
self.layer4 = make_basic_block_layer(filter_num=512,
|
||||||
|
blocks=layer_params[3],
|
||||||
|
stride=2)
|
||||||
|
|
||||||
|
self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
|
||||||
|
self.fc = tf.keras.layers.Dense(units=NUM_CLASSES, activation=tf.keras.activations.softmax)
|
||||||
|
|
||||||
|
def call(self, inputs, training=None, mask=None):
|
||||||
|
x = self.conv1(inputs)
|
||||||
|
x = self.bn1(x, training=training)
|
||||||
|
x = tf.nn.relu(x)
|
||||||
|
x = self.pool1(x)
|
||||||
|
x = self.layer1(x, training=training)
|
||||||
|
x = self.layer2(x, training=training)
|
||||||
|
x = self.layer3(x, training=training)
|
||||||
|
x = self.layer4(x, training=training)
|
||||||
|
x = self.avgpool(x)
|
||||||
|
output = self.fc(x)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class ResNetTypeII(tf.keras.Model):
|
||||||
|
def __init__(self, layer_params):
|
||||||
|
super(ResNetTypeII, self).__init__()
|
||||||
|
self.conv1 = tf.keras.layers.Conv2D(filters=64,
|
||||||
|
kernel_size=(7, 7),
|
||||||
|
strides=2,
|
||||||
|
padding="same")
|
||||||
|
self.bn1 = tf.keras.layers.BatchNormalization()
|
||||||
|
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
|
||||||
|
strides=2,
|
||||||
|
padding="same")
|
||||||
|
|
||||||
|
self.layer1 = make_bottleneck_layer(filter_num=64,
|
||||||
|
blocks=layer_params[0])
|
||||||
|
self.layer2 = make_bottleneck_layer(filter_num=128,
|
||||||
|
blocks=layer_params[1],
|
||||||
|
stride=2)
|
||||||
|
self.layer3 = make_bottleneck_layer(filter_num=256,
|
||||||
|
blocks=layer_params[2],
|
||||||
|
stride=2)
|
||||||
|
self.layer4 = make_bottleneck_layer(filter_num=512,
|
||||||
|
blocks=layer_params[3],
|
||||||
|
stride=2)
|
||||||
|
|
||||||
|
self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
|
||||||
|
self.fc = tf.keras.layers.Dense(units=NUM_CLASSES, activation=tf.keras.activations.softmax)
|
||||||
|
|
||||||
|
def call(self, inputs, training=None, mask=None):
|
||||||
|
x = self.conv1(inputs)
|
||||||
|
x = self.bn1(x, training=training)
|
||||||
|
x = tf.nn.relu(x)
|
||||||
|
x = self.pool1(x)
|
||||||
|
x = self.layer1(x, training=training)
|
||||||
|
x = self.layer2(x, training=training)
|
||||||
|
x = self.layer3(x, training=training)
|
||||||
|
x = self.layer4(x, training=training)
|
||||||
|
x = self.avgpool(x)
|
||||||
|
output = self.fc(x)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def resnet_18():
|
||||||
|
return ResNetTypeI(layer_params=[2, 2, 2, 2])
|
||||||
|
|
||||||
|
|
||||||
|
def resnet_34():
|
||||||
|
return ResNetTypeI(layer_params=[3, 4, 6, 3])
|
||||||
|
|
||||||
|
|
||||||
|
def resnet_50():
|
||||||
|
return ResNetTypeII(layer_params=[3, 4, 6, 3])
|
||||||
|
|
||||||
|
|
||||||
|
def resnet_101():
|
||||||
|
return ResNetTypeII(layer_params=[3, 4, 23, 3])
|
||||||
|
|
||||||
|
|
||||||
|
def resnet_152():
|
||||||
|
return ResNetTypeII(layer_params=[3, 8, 36, 3])
|
|
@ -0,0 +1,100 @@
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
class BasicBlock(tf.keras.layers.Layer):
|
||||||
|
|
||||||
|
def __init__(self, filter_num, stride=1):
|
||||||
|
super(BasicBlock, self).__init__()
|
||||||
|
self.conv1 = tf.keras.layers.Conv2D(filters=filter_num,
|
||||||
|
kernel_size=(3, 3),
|
||||||
|
strides=stride,
|
||||||
|
padding="same")
|
||||||
|
self.bn1 = tf.keras.layers.BatchNormalization()
|
||||||
|
self.conv2 = tf.keras.layers.Conv2D(filters=filter_num,
|
||||||
|
kernel_size=(3, 3),
|
||||||
|
strides=1,
|
||||||
|
padding="same")
|
||||||
|
self.bn2 = tf.keras.layers.BatchNormalization()
|
||||||
|
if stride != 1:
|
||||||
|
self.downsample = tf.keras.Sequential()
|
||||||
|
self.downsample.add(tf.keras.layers.Conv2D(filters=filter_num,
|
||||||
|
kernel_size=(1, 1),
|
||||||
|
strides=stride))
|
||||||
|
self.downsample.add(tf.keras.layers.BatchNormalization())
|
||||||
|
else:
|
||||||
|
self.downsample = lambda x: x
|
||||||
|
|
||||||
|
def call(self, inputs, training=None, **kwargs):
|
||||||
|
residual = self.downsample(inputs)
|
||||||
|
|
||||||
|
x = self.conv1(inputs)
|
||||||
|
x = self.bn1(x, training=training)
|
||||||
|
x = tf.nn.relu(x)
|
||||||
|
x = self.conv2(x)
|
||||||
|
x = self.bn2(x, training=training)
|
||||||
|
|
||||||
|
output = tf.nn.relu(tf.keras.layers.add([residual, x]))
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class BottleNeck(tf.keras.layers.Layer):
|
||||||
|
def __init__(self, filter_num, stride=1):
|
||||||
|
super(BottleNeck, self).__init__()
|
||||||
|
self.conv1 = tf.keras.layers.Conv2D(filters=filter_num,
|
||||||
|
kernel_size=(1, 1),
|
||||||
|
strides=1,
|
||||||
|
padding='same')
|
||||||
|
self.bn1 = tf.keras.layers.BatchNormalization()
|
||||||
|
self.conv2 = tf.keras.layers.Conv2D(filters=filter_num,
|
||||||
|
kernel_size=(3, 3),
|
||||||
|
strides=stride,
|
||||||
|
padding='same')
|
||||||
|
self.bn2 = tf.keras.layers.BatchNormalization()
|
||||||
|
self.conv3 = tf.keras.layers.Conv2D(filters=filter_num * 4,
|
||||||
|
kernel_size=(1, 1),
|
||||||
|
strides=1,
|
||||||
|
padding='same')
|
||||||
|
self.bn3 = tf.keras.layers.BatchNormalization()
|
||||||
|
|
||||||
|
self.downsample = tf.keras.Sequential()
|
||||||
|
self.downsample.add(tf.keras.layers.Conv2D(filters=filter_num * 4,
|
||||||
|
kernel_size=(1, 1),
|
||||||
|
strides=stride))
|
||||||
|
self.downsample.add(tf.keras.layers.BatchNormalization())
|
||||||
|
|
||||||
|
def call(self, inputs, training=None, **kwargs):
|
||||||
|
residual = self.downsample(inputs)
|
||||||
|
|
||||||
|
x = self.conv1(inputs)
|
||||||
|
x = self.bn1(x, training=training)
|
||||||
|
x = tf.nn.relu(x)
|
||||||
|
x = self.conv2(x)
|
||||||
|
x = self.bn2(x, training=training)
|
||||||
|
x = tf.nn.relu(x)
|
||||||
|
x = self.conv3(x)
|
||||||
|
x = self.bn3(x, training=training)
|
||||||
|
|
||||||
|
output = tf.nn.relu(tf.keras.layers.add([residual, x]))
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def make_basic_block_layer(filter_num, blocks, stride=1):
|
||||||
|
res_block = tf.keras.Sequential()
|
||||||
|
res_block.add(BasicBlock(filter_num, stride=stride))
|
||||||
|
|
||||||
|
for _ in range(1, blocks):
|
||||||
|
res_block.add(BasicBlock(filter_num, stride=1))
|
||||||
|
|
||||||
|
return res_block
|
||||||
|
|
||||||
|
|
||||||
|
def make_bottleneck_layer(filter_num, blocks, stride=1):
|
||||||
|
res_block = tf.keras.Sequential()
|
||||||
|
res_block.add(BottleNeck(filter_num, stride=stride))
|
||||||
|
|
||||||
|
for _ in range(1, blocks):
|
||||||
|
res_block.add(BottleNeck(filter_num, stride=1))
|
||||||
|
|
||||||
|
return res_block
|
|
@ -0,0 +1,68 @@
|
||||||
|
from __future__ import absolute_import, division, print_function
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow.python.keras.engine import training
|
||||||
|
from tensorflow.python.ops.gradients_impl import gradients
|
||||||
|
from data_acquisition import get_data
|
||||||
|
from resnet import resnet_18
|
||||||
|
import hyper
|
||||||
|
import math
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
model = resnet_18()
|
||||||
|
model.build(input_shape=(None, hyper.IMAGE_SHAPE[0], hyper.IMAGE_SHAPE[1], hyper.IMAGE_SHAPE[2]))
|
||||||
|
model.summary()
|
||||||
|
|
||||||
|
train_dataset,train_count,test_dataset,test_count = get_data()
|
||||||
|
|
||||||
|
# GPU settings
|
||||||
|
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||||
|
if gpus:
|
||||||
|
for gpu in gpus:
|
||||||
|
tf.config.experimental.set_memory_growth(gpu, True)
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def train_step(images, labels):
|
||||||
|
with tf.GradientTape() as tape:
|
||||||
|
predictions = model(images, training=True)
|
||||||
|
loss = hyper.scce_loss(y_true=labels,y_pred=predictions)
|
||||||
|
gradients = tape.gradient(loss,model.trainable_variables)
|
||||||
|
hyper.adadelta.apply_gradients(grads_and_vars=zip(gradients, model.trainable_variables))
|
||||||
|
|
||||||
|
hyper.mean_loss(loss)
|
||||||
|
hyper.sccea_loss(labels,predictions)
|
||||||
|
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def valid_step(images, labels):
|
||||||
|
predictions = model(images, training=False)
|
||||||
|
v_loss = hyper.scce_loss(labels, predictions)
|
||||||
|
|
||||||
|
hyper.test_loss(v_loss)
|
||||||
|
hyper.test_accuracy(labels, predictions)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for epoch in range(hyper.EPOCHS):
|
||||||
|
hyper.mean_loss.reset_states()
|
||||||
|
hyper.sccea_loss.reset_states()
|
||||||
|
step = 0
|
||||||
|
for images, labels in train_dataset:
|
||||||
|
step += 1
|
||||||
|
train_step(images, labels)
|
||||||
|
print("Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}".format(epoch + 1,
|
||||||
|
hyper.EPOCHS,
|
||||||
|
step,
|
||||||
|
math.ceil(train_count / hyper.BATCH_SIZE),
|
||||||
|
hyper.mean_loss.result(),
|
||||||
|
hyper.sccea_loss.result()))
|
||||||
|
for valid_images, valid_labels in test_dataset:
|
||||||
|
valid_step(valid_images, valid_labels)
|
||||||
|
|
||||||
|
print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
|
||||||
|
"valid loss: {:.5f}, valid accuracy: {:.5f}".format(epoch + 1,
|
||||||
|
hyper.EPOCHS,
|
||||||
|
hyper.mean_loss.result(),
|
||||||
|
hyper.sccea_loss.result(),
|
||||||
|
hyper.test_loss.result(),
|
||||||
|
hyper.test_accuracy.result()))
|
||||||
|
|
Loading…
Reference in New Issue