The following are 30 code examples of tensorflow.reshape(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example #1
Source File:    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def batch_random_flip(input_):
    """Simultaneous horizontal random flip."""
    if isinstance(input_, (float, int)):
        return input_
    shape = input_.get_shape().as_list()
    batch_size = shape[0]
    height = shape[1]
    width = shape[2]
    channels = shape[3]
    res = tf.split(axis=0, num_or_size_splits=batch_size, value=input_)
    res = [elem[0, :, :, :] for elem in res]
    res = [tf.image.random_flip_left_right(elem) for elem in res]
    res = [tf.reshape(elem, [1, height, width, channels]) for elem in res]
    res = tf.concat(axis=0, values=res)

    return res

# build a one hot representation corresponding to the integer tensor
# the one-hot dimension is appended to the integer tensor shape 
Example #2
Source File:    From Adversarial-Face-Attack with GNU General Public License v3.0 6 votes vote down vote up
def build_pgd_attack(self, eps):
        victim_embeddings = tf.constant(self.victim_embeddings, dtype=tf.float32)

        def one_step_attack(image, grad):
            core components of this attack are:
            (a) PGD adversarial attack (
            (b) momentum (
            (c) input diversity (
            orig_image = image
            image = self.structure(image)
            image = (image - 127.5) / 128.0
            image = image + tf.random_uniform(tf.shape(image), minval=-1e-2, maxval=1e-2)
            prelogits, _ =, 1.0, False, bottleneck_layer_size=512)
            embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')

            embeddings = tf.reshape(embeddings[0], [512, 1])
            objective = tf.reduce_mean(tf.matmul(victim_embeddings, embeddings))  # to be maximized

            noise, = tf.gradients(objective, orig_image)

            noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True)
            noise = 0.9 * grad + noise

            adv = tf.clip_by_value(orig_image + tf.sign(noise) * 1.0, lower_bound, upper_bound)
            return adv, noise

        input = tf.to_float(self.image_batch)
        lower_bound = tf.clip_by_value(input - eps, 0, 255.)
        upper_bound = tf.clip_by_value(input + eps, 0, 255.)

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            adv, _ = tf.while_loop(
                lambda _, __: True, one_step_attack,
                (input, tf.zeros_like(input)),
        self.adv_image = adv
        return adv 
Example #3
Source File:    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def create_test_input(batch_size, height, width, channels):
  """Create test input tensor.

    batch_size: The number of images per batch or `None` if unknown.
    height: The height of each image or `None` if unknown.
    width: The width of each image or `None` if unknown.
    channels: The number of channels per image or `None` if unknown.

    Either a placeholder `Tensor` of dimension
      [batch_size, height, width, channels] if any of the inputs are `None` or a
    constant `Tensor` with the mesh grid values along the spatial dimensions.
  if None in [batch_size, height, width, channels]:
    return tf.placeholder(tf.float32, (batch_size, height, width, channels))
    return tf.to_float(
                np.reshape(np.arange(height), [height, 1]) +
                np.reshape(np.arange(width), [1, width]),
                [1, height, width, 1]),
            [batch_size, 1, 1, channels])) 
Example #4
Source File:    From ARU-Net with GNU General Public License v2.0 6 votes vote down vote up
def sequence_to_images(tensor, num_batches):
  """Convert a batch of sequences into a batch of images.

    tensor: (num_steps, num_batchesRNN, depth) sequence tensor
    num_batches: the number of image batches

    (num_batches, height, width, depth) tensor

  shapeT = tf.shape(tensor)
  shapeL = tensor.get_shape().as_list()
  # Calculate the ouput size of the upsampled tensor
  height = tf.to_int32(shapeT[1] / num_batches)
  n_shape = tf.stack([

  reshaped = tf.reshape(tensor, n_shape)
  return tf.transpose(reshaped, [1, 2, 0, 3]) 
Example #5
Source File:    From ARU-Net with GNU General Public License v2.0 6 votes vote down vote up
def images_to_sequence(tensor):
  """Convert a batch of images into a batch of sequences.

    tensor: a (num_images, height, width, depth) tensor

    (width, num_images*height, depth) sequence tensor
  transposed = tf.transpose(tensor, [2, 0, 1, 3])

  shapeT = tf.shape(transposed)
  shapeL = transposed.get_shape().as_list()
  # Calculate the ouput size of the upsampled tensor
  n_shape = tf.stack([
  reshaped = tf.reshape(transposed, n_shape)
  return reshaped 
Example #6
Source File:    From spleeter with MIT License 6 votes vote down vote up
def _inverse_stft(self, stft_t, time_crop=None):
        """ Inverse and reshape the given STFT

        :param stft_t: input STFT
        :returns: inverse STFT (waveform)
        inversed = inverse_stft(
            tf.transpose(stft_t, perm=[2, 0, 1]),
            window_fn=lambda frame_length, dtype: (
                hann_window(frame_length, periodic=True, dtype=dtype))
        reshaped = tf.transpose(inversed)
        if time_crop is None:
            time_crop = tf.shape(self._features['waveform'])[0]
        return reshaped[:time_crop, :] 
Example #7
Source File:    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def train_lr_rfeinman(densities_pos, densities_neg, uncerts_pos, uncerts_neg):
    :param densities_pos:
    :param densities_neg:
    :param uncerts_pos:
    :param uncerts_neg:
    values_neg = np.concatenate(
        (densities_neg.reshape((1, -1)),
         uncerts_neg.reshape((1, -1))),
        axis=0).transpose([1, 0])
    values_pos = np.concatenate(
        (densities_pos.reshape((1, -1)),
         uncerts_pos.reshape((1, -1))),
        axis=0).transpose([1, 0])

    values = np.concatenate((values_neg, values_pos))
    labels = np.concatenate(
        (np.zeros_like(densities_neg), np.ones_like(densities_pos)))

    lr = LogisticRegressionCV(n_jobs=-1).fit(values, labels)

    return values, labels, lr 
Example #8
Source File:    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fprop(self, x):

        output = OrderedDict()
        # first convolutional layer
        h_conv1 = tf.nn.relu(self._conv2d(x, self.W_conv1) + self.b_conv1)
        h_pool1 = self._max_pool_2x2(h_conv1)

        # second convolutional layer
        h_conv2 = tf.nn.relu(
            self._conv2d(h_pool1, self.W_conv2) + self.b_conv2)
        h_pool2 = self._max_pool_2x2(h_conv2)

        # first fully connected layer

        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, self.W_fc1) + self.b_fc1)

        # output layer
        logits = tf.matmul(h_fc1, self.W_fc2) + self.b_fc2

        output = deterministic_dict(locals())
        del output["self"]
        output[self.O_PROBS] = tf.nn.softmax(logits=logits)

        return output 
Example #9
Source File:    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def depool_2x2(input_, stride=2):
    shape = input_.get_shape().as_list()
    batch_size = shape[0]
    height = shape[1]
    width = shape[2]
    channels = shape[3]
    res = tf.reshape(input_, [batch_size, height, 1, width, 1, channels])
    res = tf.concat(
        axis=2, values=[res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
    res = tf.concat(axis=4, values=[
        res, tf.zeros([batch_size, height, stride, width, stride - 1, channels])
    res = tf.reshape(res, [batch_size, stride * height, stride * width, channels])

    return res

# random flip on a batch of images 
Example #10
Source File:    From Traffic_sign_detection_YOLO with MIT License 6 votes vote down vote up
def _forward(self):
        inp = self.inp.out
        shape = inp.get_shape().as_list()
        _, h, w, c = shape
        s = self.lay.stride
        out = list()
        for i in range(int(h/s)):
            row_i = list()
            for j in range(int(w/s)):
                si, sj = s * i, s * j
                boxij = inp[:, si: si+s, sj: sj+s,:]
                flatij = tf.reshape(boxij, [-1,1,1,c*s*s])
                row_i += [flatij]
            out += [tf.concat(row_i, 2)]

        self.out = tf.concat(out, 1) 
Example #11
Source File:    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def preprocess_batch(images_batch, preproc_func=None):
    Creates a preprocessing graph for a batch given a function that processes
    a single image.

    :param images_batch: A tensor for an image batch.
    :param preproc_func: (optional function) A function that takes in a
        tensor and returns a preprocessed input.
    if preproc_func is None:
        return images_batch

    with tf.variable_scope('preprocess'):
        images_list = tf.split(images_batch, int(images_batch.shape[0]))
        result_list = []
        for img in images_list:
            reshaped_img = tf.reshape(img, img.shape[1:])
            processed_img = preproc_func(reshaped_img)
            result_list.append(tf.expand_dims(processed_img, axis=0))
        result_images = tf.concat(result_list, axis=0)
    return result_images 
Example #12
Source File:    From spleeter with MIT License 6 votes vote down vote up
def pad_and_reshape(instr_spec, frame_length, F):
    :param instr_spec:
    :param frame_length:
    :param F:
    spec_shape = tf.shape(instr_spec)
    extension_row = tf.zeros((spec_shape[0], spec_shape[1], 1, spec_shape[-1]))
    n_extra_row = (frame_length) // 2 + 1 - F
    extension = tf.tile(extension_row, [1, 1, n_extra_row, 1])
    extended_spec = tf.concat([instr_spec, extension], axis=2)
    old_shape = tf.shape(extended_spec)
    new_shape = tf.concat([
        [old_shape[0] * old_shape[1]],
    processed_instr_spec = tf.reshape(extended_spec, new_shape)
    return processed_instr_spec 
Example #13
Source File:    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def get_image_feature(self, char_index):
    """Returns a subset of image features for a character.

      char_index: an index of a character.

      A tensor with shape [batch_size, ?]. The output depth depends on the
      depth of input net.
    batch_size, features_num, _ = [d.value for d in self._net.get_shape()]
    slice_len = int(features_num / self._params.seq_length)
    # In case when features_num != seq_length, we just pick a subset of image
    # features, this choice is arbitrary and there is no intuitive geometrical
    # interpretation. If features_num is not dividable by seq_length there will
    # be unused image features.
    net_slice = self._net[:, char_index:char_index + slice_len, :]
    feature = tf.reshape(net_slice, [batch_size, -1])
    logging.debug('Image feature: %s', feature)
    return feature 
Example #14
Source File:    From spleeter with MIT License 5 votes vote down vote up
def _build_masks(self):
        Compute masks from the output spectrograms of the model.
        output_dict = self.model_outputs
        stft_feature = self.stft_feature
        separation_exponent = self._params['separation_exponent']
        output_sum = tf.reduce_sum(
            [e ** separation_exponent for e in output_dict.values()],
        ) + self.EPSILON
        out = {}
        for instrument in self._instruments:
            output = output_dict[f'{instrument}_spectrogram']
            # Compute mask with the model.
            instrument_mask = (output ** separation_exponent
                               + (self.EPSILON / len(output_dict))) / output_sum
            # Extend mask;
            instrument_mask = self._extend_mask(instrument_mask)
            # Stack back mask.
            old_shape = tf.shape(instrument_mask)
            new_shape = tf.concat(
                [[old_shape[0] * old_shape[1]], old_shape[2:]],
            instrument_mask = tf.reshape(instrument_mask, new_shape)
            # Remove padded part (for mask having the same size as STFT);

            instrument_mask = instrument_mask[
                              :tf.shape(stft_feature)[0], ...]
            out[instrument] = instrument_mask
        self._masks = out 
Example #15
Source File:    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def as_one_hot(input_, n_indices):
    """Convert indices to one-hot."""
    shape = input_.get_shape().as_list()
    n_elem =
    indices = tf.range(n_elem)
    indices = tf.cast(indices, tf.int64)
    indices_input = tf.concat(axis=0, values=[indices, tf.reshape(input_, [-1])])
    indices_input = tf.reshape(indices_input, [2, -1])
    indices_input = tf.transpose(indices_input)
    res = tf.sparse_to_dense(
        indices_input, [n_elem, n_indices], 1., 0., name="flat_one_hot")
    res = tf.reshape(res, [elem for elem in shape] + [n_indices])

    return res 
Example #16
Source File:    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def squeeze_2x2(input_):
    """Squeezing operation: reshape to convert space to channels."""
    return squeeze_nxn(input_, n_factor=2) 
Example #17
Source File:    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def extract_data(filename, num_images):
  """Extract the images into a 4D tensor [image index, y, x, channels].

  Values are rescaled from [0, 255] down to [-0.5, 0.5].
  print('Extracting', filename)
  with as bytestream:
    buf = * IMAGE_SIZE * num_images * NUM_CHANNELS)
    data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.float32)
    data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
    data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)
    return data 
Example #18
Source File:    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def squeeze_nxn(input_, n_factor=2):
    """Squeezing operation: reshape to convert space to channels."""
    if isinstance(input_, (float, int)):
        return input_
    shape = input_.get_shape().as_list()
    batch_size = shape[0]
    height = shape[1]
    width = shape[2]
    channels = shape[3]
    if height % n_factor != 0:
        raise ValueError("Height not divisible by %d." % n_factor)
    if width % n_factor != 0:
        raise ValueError("Width not divisible by %d." % n_factor)
    res = tf.reshape(
         height // n_factor,
         n_factor, width // n_factor,
         n_factor, channels])
    res = tf.transpose(res, [0, 1, 3, 5, 2, 4])
    res = tf.reshape(
         height // n_factor,
         width // n_factor,
         channels * n_factor * n_factor])

    return res 
Example #19
Source File:    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def orthogonal_initializer(shape, dtype=tf.float32, *args, **kwargs):
  """Generates orthonormal matrices with random values.

  Orthonormal initialization is important for RNNs:

  For non-square shapes the returned matrix will be semi-orthonormal: if the
  number of columns exceeds the number of rows, then the rows are orthonormal
  vectors; but if the number of rows exceeds the number of columns, then the
  columns are orthonormal vectors.

  We use SVD decomposition to generate an orthonormal matrix with random
  values. The same way as it is done in the Lasagne library for Theano. Note
  that both u and v returned by the svd are orthogonal and random. We just need
  to pick one with the right shape.

    shape: a shape of the tensor matrix to initialize.
    dtype: a dtype of the initialized tensor.
    *args: not used.
    **kwargs: not used.

    An initialized tensor.
  del args
  del kwargs
  flat_shape = (shape[0],[1:]))
  w = np.random.randn(*flat_shape)
  u, _, v = np.linalg.svd(w, full_matrices=False)
  w = u if u.shape == flat_shape else v
  return tf.constant(w.reshape(shape), dtype=dtype) 
Example #20
Source File:    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def encode_coordinates_alt(self, net):
    """An alternative implemenation for the encoding coordinates.

      net: a tensor of shape=[batch_size, height, width, num_features]

      a list of tensors with encoded image coordinates in them.
    batch_size, h, w, _ = net.shape.as_list()
    h_loc = [
                  tf.constant([i]), num_classes=h), [h, 1]), [1, w])
      for i in xrange(h)
    h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
    w_loc = [
          tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
          [h, 1]) for i in xrange(w)
    w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
    loc = tf.concat([h_loc, w_loc], 2)
    loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1])
    return tf.concat([net, loc], 3) 
Example #21
Source File:    From ARU-Net with GNU General Public License v2.0 5 votes vote down vote up
def horizontal_cell(images, num_filters_out, cell_fw, cell_bw, keep_prob=1.0, scope=None):
  """Run an LSTM bidirectionally over all the rows of each image.

    images: (num_images, height, width, depth) tensor
    num_filters_out: output depth
    scope: optional scope name

    (num_images, height, width, num_filters_out) tensor, where
  with tf.variable_scope(scope, "HorizontalGru", [images]):
    sequence = images_to_sequence(images)

    shapeT = tf.shape(sequence)
    sequence_length = shapeT[0]
    batch_sizeRNN = shapeT[1]
    sequence_lengths = tf.to_int64(
      tf.fill([batch_sizeRNN], sequence_length))
    forward_drop1 = DropoutWrapper(cell_fw, output_keep_prob=keep_prob)
    backward_drop1 = DropoutWrapper(cell_bw, output_keep_prob=keep_prob)
    rnn_out1, _ = tf.nn.bidirectional_dynamic_rnn(forward_drop1, backward_drop1, sequence, dtype=tf.float32,
                                                  sequence_length=sequence_lengths, time_major=True,
                                                  swap_memory=True, scope=scope)
    rnn_out1 = tf.concat(rnn_out1, 2)
    rnn_out1 = tf.reshape(rnn_out1, shape=[-1, batch_sizeRNN, 2, num_filters_out])
    output_sequence = tf.reduce_sum(rnn_out1, axis=2)
    output = sequence_to_images(output_sequence, batch_size)
    return output 
Example #22
Source File:    From deep-learning-note with MIT License 5 votes vote down vote up
def get_data(self):
        with tf.name_scope('data'):
            train_data, test_data = utils.get_mnist_dataset(self.batch_size)
            iterator =
            img, self.label = iterator.get_next()
            self.img = tf.reshape(img, shape=[-1, 28, 28, 1])
            # reshape the image to make it work with tf.nn.conv2d

            self.train_init = iterator.make_initializer(train_data)  # initializer for train_data
            self.test_init = iterator.make_initializer(test_data)  # initializer for train_data 
Example #23
Source File:    From deep-learning-note with MIT License 5 votes vote down vote up
def read_from_tfrecord(filenames):
    tfrecord_file_queue = tf.train.string_input_producer(filenames, name='queue')
    reader = tf.TFRecordReader()
    _, tfrecord_serialized =

    tfrecord_features = tf.parse_single_example(tfrecord_serialized, features={
        'label': tf.FixedLenFeature([],tf.int64),
        'shape': tf.FixedLenFeature([],tf.string),
        'image': tf.FixedLenFeature([],tf.string),
    }, name='features')

    image = tf.decode_raw(tfrecord_features['image'], tf.uint8)
    shape = tf.decode_raw(tfrecord_features['shape'], tf.int32)

    image = tf.reshape(image, shape)
    label = tfrecord_features['label']
    return label, shape, image 
Example #24
Source File:    From deep-learning-note with MIT License 5 votes vote down vote up
def preprocess_image(image, label):
    """Preprocesses an image for an `Estimator`."""
    image = image / 255.
    image = tf.reshape(image, [28, 28, 1])
    features = {FEATURES_KEY: image}
    return features, label 
Example #25
Source File:    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def testSubsampleThreeByThree(self):
    x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])
    x = resnet_utils.subsample(x, 2)
    expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])
    with self.test_session():
      self.assertAllClose(x.eval(), expected.eval()) 
Example #26
Source File:    From spleeter with MIT License 5 votes vote down vote up
def set_tensor_shape(tensor, tensor_shape):
    """ Set shape for a tensor (not in place, as opposed to tf.set_shape)

    :param tensor: Tensor to reshape.
    :param tensor_shape: Shape to apply to the tensor.
    :returns: A reshaped tensor.
    return tensor 
Example #27
Source File:    From spleeter with MIT License 5 votes vote down vote up
def pad_and_partition(tensor, segment_len):
    """ Pad and partition a tensor into segment of len segment_len
    along the first dimension. The tensor is padded with 0 in order
    to ensure that the first dimension is a multiple of segment_len.

    Tensor must be of known fixed rank


    >>> tensor = [[1, 2, 3], [4, 5, 6]]
    >>> segment_len = 2
    >>> pad_and_partition(tensor, segment_len)
    [[[1, 2], [4, 5]], [[3, 0], [6, 0]]]

    :param tensor:
    :param segment_len:
    tensor_size = tf.math.floormod(tf.shape(tensor)[0], segment_len)
    pad_size = tf.math.floormod(segment_len - tensor_size, segment_len)
    padded = tf.pad(
        [[0, pad_size]] + [[0, 0]] * (len(tensor.shape)-1))
    split = (tf.shape(padded)[0] + segment_len - 1) // segment_len
    return tf.reshape(
            [[split, segment_len], tf.shape(padded)[1:]],
Example #28
Source File:    From tensorflow-DeepFM with MIT License 5 votes vote down vote up
def predict(self, Xi, Xv):
        :param Xi: list of list of feature indices of each sample in the dataset
        :param Xv: list of list of feature values of each sample in the dataset
        :return: predicted probability of each sample
        # dummy y
        dummy_y = [1] * len(Xi)
        batch_index = 0
        Xi_batch, Xv_batch, y_batch = self.get_batch(Xi, Xv, dummy_y, self.batch_size, batch_index)
        y_pred = None
        while len(Xi_batch) > 0:
            num_batch = len(y_batch)
            feed_dict = {self.feat_index: Xi_batch,
                         self.feat_value: Xv_batch,
                         self.label: y_batch,
                         self.dropout_keep_fm: [1.0] * len(self.dropout_fm),
                         self.dropout_keep_deep: [1.0] * len(self.dropout_deep),
                         self.train_phase: False}
            batch_out =, feed_dict=feed_dict)

            if batch_index == 0:
                y_pred = np.reshape(batch_out, (num_batch,))
                y_pred = np.concatenate((y_pred, np.reshape(batch_out, (num_batch,))))

            batch_index += 1
            Xi_batch, Xv_batch, y_batch = self.get_batch(Xi, Xv, dummy_y, self.batch_size, batch_index)

        return y_pred 
Example #29
Source File:    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def l2_batch_normalize(x, epsilon=1e-12, scope=None):
    Helper function to normalize a batch of vectors.
    :param x: the input placeholder
    :param epsilon: stabilizes division
    :return: the batch of l2 normalized vector
    with tf.name_scope(scope, "l2_batch_normalize") as scope:
        x_shape = tf.shape(x)
        x = tf.contrib.layers.flatten(x)
        x /= (epsilon + reduce_max(tf.abs(x), 1, keepdims=True))
        square_sum = reduce_sum(tf.square(x), 1, keepdims=True)
        x_inv_norm = tf.rsqrt(np.sqrt(epsilon) + square_sum)
        x_norm = tf.multiply(x, x_inv_norm)
        return tf.reshape(x_norm, x_shape, scope) 
Example #30
Source File:    From Black-Box-Audio with MIT License 5 votes vote down vote up
def compute_mfcc(audio, **kwargs):
    Compute the MFCC for a given audio waveform. This is
    identical to how DeepSpeech does it, but does it all in
    TensorFlow so that we can differentiate through it.

    batch_size, size = audio.get_shape().as_list()
    audio = tf.cast(audio, tf.float32)

    # 1. Pre-emphasizer, a high-pass filter
    audio = tf.concat((audio[:, :1], audio[:, 1:] - 0.97*audio[:, :-1], np.zeros((batch_size,1000),dtype=np.float32)), 1)

    # 2. windowing into frames of 320 samples, overlapping
    windowed = tf.stack([audio[:, i:i+400] for i in range(0,size-320,160)],1)

    # 3. Take the FFT to convert to frequency space
    ffted = tf.spectral.rfft(windowed, [512])
    ffted = 1.0 / 512 * tf.square(tf.abs(ffted))

    # 4. Compute the Mel windowing of the FFT
    energy = tf.reduce_sum(ffted,axis=2)+1e-30
    filters = np.load("filterbanks.npy").T
    feat = tf.matmul(ffted, np.array([filters]*batch_size,dtype=np.float32))+1e-30

    # 5. Take the DCT again, because why not
    feat = tf.log(feat)
    feat = tf.spectral.dct(feat, type=2, norm='ortho')[:,:,:26]

    # 6. Amplify high frequencies for some reason
    _,nframes,ncoeff = feat.get_shape().as_list()
    n = np.arange(ncoeff)
    lift = 1 + (22/2.)*np.sin(np.pi*n/22)
    feat = lift*feat
    width = feat.get_shape().as_list()[1]

    # 7. And now stick the energy next to the features
    feat = tf.concat((tf.reshape(tf.log(energy),(-1,width,1)), feat[:, :, 1:]), axis=2)
    return feat