python source code of OxFlowers_CNN

# Copyright (c) 2018/4/6 Hu Zhiming JimmyHu@pku.edu.cn All Rights Reserved.
# The CNN estimator for 17_OxFlowers Dataset.


#######################################
# Network Architecture
# input layer
# conv3_32
# maxpool
# conv3_64
# maxpool
# conv3_128
# maxpool
# conv3_256
# maxpool
# conv3_512
# maxpool
# conv3_1024
# maxpool
# FC-1024
# output layer
#######################################


# import the libs(libs & future libs).
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import datetime


# the params of the input dataset.
# the width, height & channels of the images in the input TFRecords file.
ImageWidth = 224
ImageHeight = 224
ImageChannels = 3
# the number of categories.
CategoryNum = 17

# the params for training.
# the batch size for training.
Batch_Size = 85
# Steps is the number of training steps in a time. 
Steps = 200
# Loops is the total number of training times.
# Total_Training_Steps = Steps* Loops
Loops = 6


# Output thogging info.
# Use tensorboard --logdir=PATH to view the graphs.
tf.logging.set_verbosity(tf.logging.INFO)


def cnn_model_fn(features, labels, mode):
	"""the model function for CNN OxFlower."""
	# Input Layer
	# Reshape X to 4-D tensor: [batch_size, width, height, channels]
	# oxflower images are 224*224 pixels, and have 3 color channels
	input_layer = tf.reshape(features['x'], [-1, ImageWidth, ImageHeight, ImageChannels])
	# print(type(features))
	# print(features)
	# print(input_layer.shape)
	# print(type(labels))
	# input('1:')
	
	# Convolutional Layer #1
	conv1 = tf.layers.conv2d(
		inputs = input_layer,
		filters = 32,
		kernel_size = 3,
		padding = "same")

	# Batch Normalization Layer #3
	bn1 = tf.layers.batch_normalization(inputs = conv1)
	layer1 = tf.nn.relu(bn1)

	
	# Pooling Layer #1
	pool1 = tf.layers.max_pooling2d(inputs = layer1, pool_size = [2, 2], strides = 2)

	# Convolutional Layer #2
	conv2 = tf.layers.conv2d(
		inputs = pool1,
		filters = 64,
		kernel_size = 3,
		padding = "same")

	# Batch Normalization Layer #3
	bn2 = tf.layers.batch_normalization(inputs = conv2)
	layer2 = tf.nn.relu(bn2)
	
	# Pooling Layer #2
	pool2 = tf.layers.max_pooling2d(inputs = layer2, pool_size = [2, 2], strides = 2)

	# Convolutional Layer #3
	conv3 = tf.layers.conv2d(
		inputs = pool2,
		filters = 128,
		kernel_size = 3,
		padding = "same")
	
	# Batch Normalization Layer #3
	bn3 = tf.layers.batch_normalization(inputs = conv3)
	layer3 = tf.nn.relu(bn3)
	
	# Pooling Layer #3
	pool3 = tf.layers.max_pooling2d(inputs = layer3, pool_size = [2, 2], strides = 2)
	
	# Convolutional Layer #4
	conv4 = tf.layers.conv2d(
		inputs = pool3,
		filters = 256,
		kernel_size = 3,
		padding = "same")

	# Batch Normalization Layer #4
	bn4 = tf.layers.batch_normalization(inputs = conv4)
	layer4 = tf.nn.relu(bn4)
	
	# Pooling Layer #4
	pool4 = tf.layers.max_pooling2d(inputs = layer4, pool_size = [2, 2], strides = 2)	

	# Convolutional Layer #5
	conv5 = tf.layers.conv2d(
		inputs = pool4,
		filters = 512,
		kernel_size = 3,
		padding = "same")	

	# Batch Normalization Layer #5
	bn5 = tf.layers.batch_normalization(inputs = conv5) 
	layer5 = tf.nn.relu(bn5)
	pool5 = tf.layers.max_pooling2d(inputs = layer5, pool_size = [2, 2], strides = 2)	

	# Convolutional Layer #6
	conv6 = tf.layers.conv2d(
		inputs = pool5,
		filters = 1024,
		kernel_size = 3,
		padding = "same",)	
	
	# Batch Normalization Layer #6
	bn6 = tf.layers.batch_normalization(inputs = conv6)
	layer6 = tf.nn.relu(bn6)
	# Pooling Layer #6
	pool6 = tf.layers.max_pooling2d(inputs = layer6, pool_size = [2, 2], strides = 2)
	

	# Flatten tensor into a batch of vectors
	pool6_flat = tf.reshape(pool6, [-1, pool6.shape[1]*pool6.shape[2]*pool6.shape[3]])
  
	# Fully Connected Dense Layer #1
	fc_1 = tf.layers.dense(inputs = pool6_flat, units = 1024, activation=tf.nn.relu)

	# Add dropout operation; 0.6 probability that element will be kept
	dropout_1 = tf.layers.dropout(
		inputs = fc_1, rate = 0.8, training=mode == tf.estimator.ModeKeys.TRAIN)

	# Logits layer
	# Output Tensor Shape: [batch_size, CategoryNum]
	# Default: activation=None, maintaining a linear activation.
	logits = tf.layers.dense(inputs = dropout_1, units = CategoryNum)

	predictions = {
		# Generate predictions (for PREDICT and EVAL mode)
		"classes": tf.argmax(input=logits, axis=1),
		# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
		# `logging_hook`.
		"probabilities": tf.nn.softmax(logits, name="softmax_tensor")}
	
	if mode == tf.estimator.ModeKeys.PREDICT:
		return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

	# Calculate Loss (for both TRAIN and EVAL modes)
	# No need to use one-hot labels.
	loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

	# Calculate evaluation metrics.
	accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions["classes"], name='acc_op')
	eval_metric_ops = {'accuracy': accuracy}
	# Use tensorboard --logdir=PATH to view the graphs.
	# The tf.summary.scalar will make accuracy available to TensorBoard in both TRAIN and EVAL modes. 
	tf.summary.scalar('accuracy', accuracy[1])
	
	# Configure the Training Op (for TRAIN mode)
	if mode == tf.estimator.ModeKeys.TRAIN:
		optimizer = tf.train.AdamOptimizer(learning_rate = 0.0001)
		train_op = optimizer.minimize(loss=loss,global_step=tf.train.get_global_step())
		return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

	# Add evaluation metrics (for EVAL mode)
	if mode == tf.estimator.ModeKeys.EVAL:
		return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)  
	
	
def read_and_decode(filename):
	"""
	read and decode a TFRecords file.
	returns numpy array objects.
	pipeline: TFRecords --> queue --> serialized_example --> dict.
	"""
	# Output strings (e.g. filenames) to a queue for an input pipeline.
	filename_queue = tf.train.string_input_producer([filename])
	# print(filename_queue)
	# A Reader that outputs the records from a TFRecords file.
	reader = tf.TFRecordReader()
	# reader.read(queue)
	# Args queue: A Queue or a mutable string Tensor representing a handle to a Queue, with string work items.
	# Returns: A tuple of Tensors (key, value). key: A string scalar Tensor. value: A string scalar Tensor.
	_, serialized_example = reader.read(filename_queue)
	# print(serialized_example)
	
	# Parses a single Example proto.
	# Returns a dict mapping feature keys to Tensor and SparseTensor values.
	features = tf.parse_single_example(serialized_example,features={
	'label': tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string),})
	# Reinterpret the bytes of a string as a vector of numbers.
	imgs = tf.decode_raw(features['img_raw'], tf.uint8)
	# print(img.dtype)
	# print(img.shape)
	# Reshapes a tensor.
	imgs = tf.reshape(imgs, [-1, ImageWidth, ImageHeight, ImageChannels])  
	# cast the data from (0, 255) to (-0.5, 0.5)
	# (-0.5, 0.5) may be better than (0, 1).
	imgs = tf.cast(imgs, tf.float32) * (1. / 255) - 0.5
	labels = tf.cast(features['label'], tf.int64)  
	
	# print(type(imgs))
	# print(imgs.shape)
	# print(type(labels))
	# print(labels.shape)
	return imgs, labels  


def parse_function(example_proto):
	"""parse function is used to parse a single TFRecord example in the dataset."""
	# Parses a single Example proto.
	# Returns a dict mapping feature keys to Tensor and SparseTensor values.
	features = tf.parse_single_example(example_proto,features={
	'label': tf.FixedLenFeature([], tf.int64), 'img_raw' : tf.FixedLenFeature([], tf.string),})
	# Reinterpret the bytes of a string as a vector of numbers.
	imgs = tf.decode_raw(features['img_raw'], tf.uint8)
	# Reshapes a tensor.
	imgs = tf.reshape(imgs, [ImageWidth, ImageHeight, 3])  
	# cast the data from (0, 255) to (-0.5, 0.5)
	# (-0.5, 0.5) may be better than (0, 1).
	imgs = tf.cast(imgs, tf.float32) * (1. / 255) - 0.5
	labels = tf.cast(features['label'], tf.int64) 
	return {'x': imgs}, labels


def train_input_fn(tfrecords, batch_size):
	"""
	An input function for training mode.
	tfrecords: the filename of the training TFRecord file, batch_size: the batch size.
	"""
	# read the TFRecord file into a dataset.
	dataset = tf.data.TFRecordDataset(tfrecords)
	# parse the dataset.
	dataset = dataset.map(parse_function)
	# the size of the buffer for shuffling.
	# buffer_size should be greater than the number of examples in the Dataset, ensuring that the data is completely shuffled. 
	buffer_size = 10000
	# Shuffle, repeat, and batch the examples.
	dataset = dataset.shuffle(buffer_size).repeat().batch(batch_size)
	# print(dataset)
	
	# make an one shot iterator to get the data of a batch.
	train_iterator = dataset.make_one_shot_iterator()
	# get the features and labels.
	features, labels = train_iterator.get_next()
	# print(features)
	# print(labels)
	return features, labels

def eval_input_fn(tfrecords, batch_size):
	"""
	An input function for evaluation mode.
	tfrecords: the filename of the evaluation/test TFRecord file, batch_size: the batch size.
	"""
	# read the TFRecord file into a dataset.
	dataset = tf.data.TFRecordDataset(tfrecords)
	# parse the dataset.
	dataset = dataset.map(parse_function)
	
	# Shuffle, repeat, and batch the examples.
	dataset = dataset.batch(batch_size)
	# print(dataset)
		# make an one shot iterator to get the data of a batch.
	eval_iterator = dataset.make_one_shot_iterator()
	# get the features and labels.
	features, labels = eval_iterator.get_next()
	# print(features)
	# print(labels)
	return features, labels


def main(unused_argv):

	# Create the Estimator
	oxflower_classifier = tf.estimator.Estimator(
		model_fn=cnn_model_fn, 
		model_dir="Models/CNN_75/")
	
	"""
	# Uncomment this to retain the model.
	# train and validate the model in a loop.
	# the start time of training.
	start_time = datetime.datetime.now()
	for i in range(Loops):
		# Train the model
		oxflower_classifier.train(
			input_fn=lambda:train_input_fn('Dataset/train_aug.tfrecords', Batch_Size),
			steps = Steps)
		# Evaluate the model on validation set.
		eval_results = oxflower_classifier.evaluate(input_fn=lambda:eval_input_fn('Dataset/validation.tfrecords', Batch_Size))
		# Calculate the accuracy of our CNN model.
		accuracy = eval_results['accuracy']*100
		print('\n\ntraining steps: {}'.format((i+1)*Steps))
		print('Validation set accuracy: {:0.2f}%\n\n'.format(accuracy))
	
	# the end time of training.
	end_time = datetime.datetime.now()
	print('\n\n\ntraining starts at: {}'.format(start_time))
	print('\ntraining ends at: {}\n\n\n'.format(end_time))
	"""
	
	# evaluate the model on test set.
	# Evaluate the model on test set.
	eval_results = oxflower_classifier.evaluate(input_fn=lambda:eval_input_fn('Dataset/test.tfrecords', Batch_Size))
	# Calculate the accuracy of our CNN model.
	accuracy = eval_results['accuracy']*100
	print('\n\nTest set accuracy: {:0.2f}%\n\n'.format(accuracy))	
	
if __name__ == "__main__":
	"""tf.app.run() runs the main function in this module by default."""
	tf.app.run()