# ops.py --- # # Filename: ops.py # Description: # Author: Kwang Moo Yi # Maintainer: # Created: Tue Apr 3 14:09:17 2018 (-0700) # Version: # Package-Requires: () # URL: # Doc URL: # Keywords: # Compatibility: # # # Modified by: Goncalo Pais # Date: 28 Jun 2019 # https://arxiv.org/abs/1904.01701 # # Instituto Superior Técnico (IST) # Change Log: # # # # Copyright (C) # Visual Computing Group @ University of Victoria # Computer Vision Lab @ EPFL # Code: import numpy as np from tensorflow.python.framework import function import tensorflow as tf from six.moves import xrange # ------------------------------------------------------------ # Tensorflow ops def tf_get_shape_as_list(x): return [_s if _s is not None else - 1 for _s in x.get_shape().as_list()] def tf_quaternion_from_matrix(M): import tensorflow as tf m00 = M[:, 0, 0][..., None] m01 = M[:, 0, 1][..., None] m02 = M[:, 0, 2][..., None] m10 = M[:, 1, 0][..., None] m11 = M[:, 1, 1][..., None] m12 = M[:, 1, 2][..., None] m20 = M[:, 2, 0][..., None] m21 = M[:, 2, 1][..., None] m22 = M[:, 2, 2][..., None] # symmetric matrix K zeros = tf.zeros_like(m00) K = tf.concat( [m00 - m11 - m22, zeros, zeros, zeros, m01 + m10, m11 - m00 - m22, zeros, zeros, m02 + m20, m12 + m21, m22 - m00 - m11, zeros, m21 - m12, m02 - m20, m10 - m01, m00 + m11 + m22], axis=1) K = tf.reshape(K, (-1, 4, 4)) K /= 3.0 # quaternion is eigenvector of K that corresponds to largest eigenvalue w, V = tf.self_adjoint_eig(K) q0 = V[:, 3, 3][..., None] q1 = V[:, 0, 3][..., None] q2 = V[:, 1, 3][..., None] q3 = V[:, 2, 3][..., None] q = tf.concat([q0, q1, q2, q3], axis=1) sel = tf.reshape(tf.to_float(q[:, 0] < 0.0), (-1, 1)) q = (1.0 - sel) * q - sel * q return q def tf_matrix_from_quaternion(q, eps=1e-10): import tensorflow as tf # Make unit quaternion q_norm = q / (eps + tf.norm(q, axis=1, keep_dims=True)) q_norm *= tf.constant(2.0 ** 0.5, dtype=tf.float32) qq = tf.matmul( tf.reshape(q_norm, (-1, 4, 1)), tf.reshape(q_norm, (-1, 1, 4)) ) M = tf.stack([ 1.0 - qq[:, 2, 2] - qq[:, 3, 3], qq[:, 1, 2] - qq[:, 3, 0], qq[:, 1, 3] + qq[:, 2, 0], qq[:, 1, 2] + qq[:, 3, 0], 1.0 - qq[:, 1, 1] - qq[:, 3, 3], qq[:, 2, 3] - qq[:, 1, 0], qq[:, 1, 3] - qq[:, 2, 0], qq[:, 2, 3] + qq[:, 1, 0], 1.0 - qq[:, 1, 1] - qq[:, 2, 2] ], axis=1) return M def tf_skew_symmetric(v): import tensorflow as tf zero = tf.zeros_like(v[:, 0]) M = tf.stack([ zero, -v[:, 2], v[:, 1], v[:, 2], zero, -v[:, 0], -v[:, 1], v[:, 0], zero, ], axis=1) return M def tf_unskew_symmetric(M): import tensorflow as tf v = tf.stack([ 0.5 * (M[:, 7] - M[:, 5]), 0.5 * (M[:, 2] - M[:, 6]), 0.5 * (M[:, 3] - M[:, 1]), ], axis=1) return v # ------------------------------------------------------------ # Architecture related def bn_act(linout, perform_gcn, perform_bn, activation_fn, is_training, data_format): import tensorflow as tf """ Perform batch normalization and activation """ if data_format == "NHWC": axis = -1 else: axis = 1 # Global Context normalization on the input if perform_gcn: # Epsilon to be used in the tf.nn.batch_normalization var_eps = 1e-3 # get mean variance for single sample (channel-wise, note that we omit # axis=1 since we are expecting a size of 1 in that dimension) mean, variance = tf.nn.moments(linout, axes=[2], keep_dims=True) # Use tensorflow's nn.batchnorm linout = tf.nn.batch_normalization( linout, mean, variance, None, None, var_eps) if perform_bn: with tf.variable_scope("bn", reuse=tf.AUTO_REUSE): linout = tf.layers.batch_normalization( inputs=linout, center=True, scale=True, training=is_training, trainable=False, axis=axis, ) if activation_fn is None: output = linout else: output = activation_fn(linout) return output def pad_cyclic(tensor, paddings): import tensorflow as tf ndim = len(paddings) for _dim, _pad in zip(xrange(ndim), paddings): pad_list = [] if _pad[0] > 0: # Padding to put at front slice_st = [slice(None, None)] * ndim slice_st[_dim] = slice(-_pad[0], None) pad_list += [tensor[tuple(slice_st)]] # Original pad_list += [tensor] if _pad[1] > 0: # Padding to put at back slice_ed = [slice(None, None)] * ndim slice_ed[_dim] = slice(None, _pad[1]) pad_list += [tensor[tuple(slice_ed)]] if len(pad_list) > 1: # Concatenate to do padding tensor = tf.concat(pad_list, axis=_dim) return tensor def conv1d_pad_cyclic(inputs, ksize, numconv, data_format="NCHW"): in_shp = tf_get_shape_as_list(inputs) ksize = 2 * (ksize // 2 * numconv) + 1 if data_format == "NCHW": assert (ksize < in_shp[-1]) or (in_shp[-1] == -1) if np.mod(ksize, 2) == 0: paddings = [ [0, 0], [0, 0], [0, 0], [ksize // 2 - 1, ksize // 2] ] else: paddings = [ [0, 0], [0, 0], [0, 0], [ksize // 2, ksize // 2] ] else: assert (ksize < in_shp[-2]) or (in_shp[-2] == -1) if np.mod(ksize, 2) == 0: paddings = [ [0, 0], [0, 0], [ksize // 2 - 1, ksize // 2], [0, 0] ] else: paddings = [ [0, 0], [0, 0], [ksize // 2, ksize // 2], [0, 0] ] inputs = pad_cyclic(inputs, paddings) return inputs def get_W_b_conv1d(in_channel, out_channel, ksize, dtype=None): import tensorflow as tf if dtype is None: dtype = tf.float32 fanin = in_channel * ksize W = tf.get_variable( "weights", shape=[1, ksize, in_channel, out_channel], dtype=dtype, initializer=tf.truncated_normal_initializer(stddev=2.0 / fanin), # initializer=tf.random_normal_initializer(stddev=0.02), ) b = tf.get_variable( "biases", shape=[out_channel], dtype=dtype, initializer=tf.zeros_initializer(), ) tf.summary.histogram("W", W) tf.summary.histogram("b", b) return W, b def conv1d_layer(inputs, ksize, nchannel, activation_fn, perform_bn, perform_gcn, is_training, perform_kron=False, padding="CYCLIC", data_format="NCHW", act_pos="post"): import tensorflow as tf assert act_pos == "pre" or act_pos == "post" # Pad manually if padding == "CYCLIC": if ksize > 1: inputs = conv1d_pad_cyclic( inputs, ksize, 1, data_format=data_format) cur_padding = "VALID" else: cur_padding = padding in_shp = tf_get_shape_as_list(inputs) if data_format == "NHWC": in_channel = in_shp[-1] ksizes = [1, 1, ksize, 1] else: in_channel = in_shp[1] ksizes = [1, 1, 1, ksize] assert len(in_shp) == 4 # # Lift with kronecker # if not is_first: # inputs = tf.concat([ # inputs, # kronecker_layer(inputs), # ], axis=-1) self_ksize = ksize do_add = False # If pre activation if act_pos == "pre": inputs = bn_act(inputs, perform_gcn, perform_bn, activation_fn, is_training, data_format) # Normal convolution with tf.variable_scope("self-conv"): W, b = get_W_b_conv1d(in_channel, nchannel, self_ksize) # tf.summary.histogram("W", W) # tf.summary.histogram("b", b) # Convolution in the valid region only linout = tf.nn.conv2d( inputs, W, [1, 1, 1, 1], cur_padding, data_format=data_format) linout = tf.nn.bias_add(linout, b, data_format=data_format) # If post activation output = linout if act_pos == "post": output = bn_act(linout, perform_gcn, perform_bn, activation_fn, is_training, data_format) return output def conv1d_resnet_block(inputs, ksize, nchannel, activation_fn, is_training, midchannel=None, perform_bn=False, perform_gcn=False, padding="CYCLIC", act_pos="post", data_format="NCHW"): import tensorflow as tf # In case we want to do a bottleneck layer if midchannel is None: midchannel = nchannel # don't activate conv1 in case of midact conv1_act_fn = activation_fn if act_pos == "mid": conv1_act_fn = None act_pos = "pre" # Pass branch with tf.variable_scope("pass-branch"): # passthrough to be used when num_outputs != num_inputs in_shp = tf_get_shape_as_list(inputs) if data_format == "NHWC": in_channel = in_shp[-1] else: in_channel = in_shp[1] if in_channel != nchannel: cur_in = inputs # Simply change channels through 1x1 conv with tf.variable_scope("conv"): cur_in = conv1d_layer( inputs=inputs, ksize=1, nchannel=nchannel, activation_fn=None, perform_bn=False, perform_gcn=False, is_training=is_training, padding=padding, data_format=data_format, ) orig_inputs = cur_in else: orig_inputs = inputs # Conv branch with tf.variable_scope("conv-branch"): cur_in = inputs # Do bottle neck if necessary (Linear) if midchannel != nchannel: with tf.variable_scope("preconv"): cur_in = conv1d_layer( inputs=cur_in, ksize=1, nchannel=nchannel, activation_fn=None, perform_bn=False, perform_gcn=False, is_training=is_training, padding=padding, data_format=data_format, ) cur_in = activation_fn(cur_in) # Main convolution with tf.variable_scope("conv1"): # right branch cur_in = conv1d_layer( inputs=cur_in, ksize=ksize, nchannel=nchannel, activation_fn=conv1_act_fn, perform_bn=perform_bn, perform_gcn=perform_gcn, is_training=is_training, padding=padding, act_pos=act_pos, data_format=data_format, ) # Main convolution with tf.variable_scope("conv2"): # right branch cur_in = conv1d_layer( inputs=cur_in, ksize=ksize, nchannel=nchannel, activation_fn=activation_fn, perform_bn=perform_bn, perform_gcn=perform_gcn, is_training=is_training, padding=padding, act_pos=act_pos, data_format=data_format, ) # Do bottle neck if necessary (Linear) if midchannel != nchannel: with tf.variable_scope("postconv"): cur_in = conv1d_layer( inputs=cur_in, ksize=1, nchannel=nchannel, activation_fn=None, perform_bn=False, perform_gcn=False, is_training=is_training, padding=padding, data_format=data_format, ) cur_in = activation_fn(cur_in) # Crop lb or rb accordingly if padding == "VALID" and ksize > 1: # Crop pass branch results if np.mod(ksize, 2) == 0: crop_st = ksize // 2 - 1 else: crop_st = ksize // 2 crop_ed = ksize // 2 if data_format == "NHWC": orig_inputs = orig_inputs[:, :, crop_st:-crop_ed, :] else: orig_inputs = orig_inputs[:, :, :, crop_st:-crop_ed] return cur_in + orig_inputs def linear(input_, outputSize, activation_fn = None, name = 'linear'): import tensorflow as tf shape = input_.get_shape().as_list() with tf.variable_scope(name): w = tf.get_variable('w_linear', [shape[1], outputSize], tf.float32, tf.truncated_normal_initializer(stddev=0.1)) b = tf.get_variable('bias', [outputSize], initializer=tf.constant_initializer(0.0)) out = tf.matmul(input_,w) + b if activation_fn != None: return activation_fn(out) else: return out # W = tf.get_variable( # "weights", shape=[1, ksize, in_channel, out_channel], dtype=dtype, # initializer=tf.truncated_normal_initializer(stddev=2.0 / fanin), # # initializer=tf.random_normal_initializer(stddev=0.02), # ) # b = tf.get_variable( # "biases", shape=[out_channel], dtype=dtype, # initializer=tf.zeros_initializer(), # ) def conv2d(x, outputDim, patchSize, stride, activation_fn=tf.nn.relu, padding='VALID', name='conv2d'): with tf.variable_scope(name): s = [1, stride[0], stride[1], 1] kernelShape = [patchSize, patchSize, x.get_shape().as_list()[-1], outputDim] w = tf.get_variable('w', kernelShape, tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) conv = tf.nn.conv2d(x, w, s, padding) b = tf.get_variable('bias', [outputDim], initializer=tf.constant_initializer(0.0)) out = activation_fn(conv + b) return out, w, b def resnet_reg(cur_input, nb_channels, patchSize, stride, is_training, data_format="NCHW", padding='SAME', activation_fn= tf.nn.relu, name ='resblock-reg'): with tf.variable_scope(name): output, w1, b1 = conv2d(cur_input, nb_channels, patchSize, stride, padding=padding, name='res1') output = bn_act(output, False, True, None, is_training, data_format) output, w2, b2 = conv2d(output, nb_channels, patchSize, stride, padding=padding, name='res2') output = bn_act(output, False, True, None, is_training, data_format) output = tf.add(output, cur_input) return output def regression_layer(cur_input, nb_channels, patch, stride, nb_fc, perform_gcn, perform_bn, activation_fn, is_training, representation, data_format="NCHW"): cur_input = bn_act(cur_input, perform_gcn, perform_bn, activation_fn, is_training, data_format) cur_input = tf.expand_dims(cur_input, axis=3) outputs, w, b = conv2d(cur_input, nb_channels, patch, stride) print(outputs.shape) print('here ^') # #outputs = resnet_reg(outputs, nb_channels, patch, [1, 1], is_training) #print(outputs.shape) # outputs = bn_act(outputs, perform_gcn, perform_bn, activation_fn, is_training, data_format) # outputs, w, b = conv2d(outputs, nb_channels*2, patch, [1, 1]) shape = outputs.get_shape() num_features = shape[1:4].num_elements() outputs = tf.reshape(outputs, [-1, num_features]) print(outputs.shape) l1 = linear(outputs, nb_fc, activation_fn=tf.nn.relu, name='linear_1') # R_hat = linear(l1, 3, name='R_hat') # t_hat = linear(l1, 3, name='t_hat') if representation == 'lie': Rt_hat = linear(l1, 6, name='Rt_hat') R_hat = tf.transpose(tf.stack([Rt_hat[:, 0], Rt_hat[:, 1], Rt_hat[:, 2]])) t_hat = tf.transpose(tf.stack([Rt_hat[:, 3], Rt_hat[:, 4], Rt_hat[:, 5]])) elif representation == 'quat': Rt_hat = linear(l1, 7, name='Rt_hat') R_hat = tf.transpose(tf.stack([Rt_hat[:, 0], Rt_hat[:, 1], Rt_hat[:, 2], Rt_hat[:, 3]])) t_hat = tf.transpose(tf.stack([Rt_hat[:, 4], Rt_hat[:, 5], Rt_hat[:, 6]])) elif representation == 'linear': Rt_hat = linear(l1, 12, name='Rt_hat') R_hat = tf.transpose(tf.stack([Rt_hat[:, 0], Rt_hat[:, 1], Rt_hat[:, 2], Rt_hat[:, 3], Rt_hat[:, 4], Rt_hat[:, 5], Rt_hat[:, 6], Rt_hat[:, 7], Rt_hat[:, 8]])) t_hat = tf.transpose(tf.stack([Rt_hat[:, 9], Rt_hat[:, 10], Rt_hat[:, 11]])) else: print('Error in the representation') R_hat = [] t_hat =[] exit(10) return R_hat, t_hat def globalmax_pool1d(inputs): import tensorflow as tf with tf.variable_scope('max_pool'): outputs = tf.reduce_max(inputs, axis=2) return outputs def globalmean_pool1d(inputs): import tensorflow as tf with tf.variable_scope('mean_pool'): outputs = tf.reduce_mean(inputs, axis=2) return outputs def tf_matrix_vector_mul(M, v): import tensorflow as tf # print(v.shape) sh = tf.shape(v) M = tf.expand_dims(M, 1) M_ = tf.tile(M, [1, sh[1], 1, 1]) m0 = tf.reshape(tf.reduce_sum(tf.multiply(M_[:, :, 0, :], v), axis=2), (sh[0], sh[1], 1)) m1 = tf.reshape(tf.reduce_sum(tf.multiply(M_[:, :, 1, :], v), axis=2), (sh[0], sh[1], 1)) m2 = tf.reshape(tf.reduce_sum(tf.multiply(M_[:, :, 2, :], v), axis=2), (sh[0], sh[1], 1)) T = tf.concat([m0, m1, m2], axis=2) # print(T.shape) return T def tf_matrix4_vector_mul(M, v): import tensorflow as tf # print(v.shape) sh = tf.shape(v) M = tf.expand_dims(M, 1) M_ = tf.tile(M, [1, sh[1], 1, 1]) m0 = tf.reshape(tf.reduce_sum(tf.multiply(M_[:, :, 0, :], v), axis=2), (sh[0], sh[1], 1)) m1 = tf.reshape(tf.reduce_sum(tf.multiply(M_[:, :, 1, :], v), axis=2), (sh[0], sh[1], 1)) m2 = tf.reshape(tf.reduce_sum(tf.multiply(M_[:, :, 2, :], v), axis=2), (sh[0], sh[1], 1)) m3 = tf.reshape(tf.reduce_sum(tf.multiply(M_[:, :, 3, :], v), axis=2), (sh[0], sh[1], 1)) T = tf.concat([m0, m1, m2, m3], axis=2) # print(T.shape) return T def tf_add_vectors(v, u): import tensorflow as tf sh = tf.shape(v) u = tf.expand_dims(u, 1) u_ = tf.tile(u, [1, sh[1], 1]) y = tf.add(v, u_) return y def tf_mul_vectors(u, v): import tensorflow as tf # y = tf.einsum('abi,abj->abij', u, v) y = tf.matmul(u, v, transpose_a = True) return y def geman_mcclure(x, alpha = 10.): import tensorflow as tf y = tf.norm(x, axis=2) sh = tf.shape(x) alpha = tf.constant(alpha, shape=[1, 1]) alpha = tf.tile(alpha, [sh[0], sh[1]]) l = tf.square(y)/2 l = l/(tf.square(alpha) + tf.square(y)) return l def l1(x): import tensorflow as tf return tf.abs(tf.norm(x, axis=2)) def l2(x): import tensorflow as tf return tf.reduce_sum(tf.square(x), axis=2) def l05(x): import tensorflow as tf return 2*tf.sqrt(tf.abs(tf.norm(x, axis=2))) def np_matrix4_vector_mul(M, v): import numpy as np # print(v.shape) sh = v.shape M = np.expand_dims(M, 0) M_ = np.tile(M, [sh[0], 1, 1]) m0 = np.reshape(np.sum(np.multiply(M_[:, 0, :], v), axis=1), (sh[0], 1)) m1 = np.reshape(np.sum(np.multiply(M_[:, 1, :], v), axis=1), (sh[0], 1)) m2 = np.reshape(np.sum(np.multiply(M_[:, 2, :], v), axis=1), (sh[0], 1)) m3 = np.reshape(np.sum(np.multiply(M_[:, 3, :], v), axis=1), (sh[0], 1)) T = np.concatenate([m0, m1, m2, m3], axis=1) # print(T.shape) return T # # ops.py ends here