python source code of audio

import tensorflow as tf
import numpy as np


def mu_law_encode(audio, quantization_channels=256):
  '''Quantizes waveform amplitudes.
  Adapted from https://github.com/ibab/tensorflow-wavenet
  '''
  with tf.name_scope('encode'):
    mu = quantization_channels - 1
    safe_audio_abs = tf.minimum(tf.abs(audio), 1.0)
    magnitude = tf.log(1. + mu * safe_audio_abs) / tf.log(1. + mu)
    signal = tf.sign(audio) * magnitude
    return tf.cast((signal + 1) / 2 * mu + 0.5, tf.uint8)

# # Numpy version
# def mu_law_encode(audio, quantization_channels=256):
#   '''Quantizes waveform amplitudes.
#   Adapted from https://github.com/ibab/tensorflow-wavenet
#   '''
#   with tf.name_scope('encode'):
#     mu = quantization_channels - 1
#     safe_audio_abs = np.minimum(np.abs(audio), 1.0)
#     magnitude = np.log1p(mu * safe_audio_abs) / np.log1p(mu)
#     signal = np.sign(audio) * magnitude
#     return ((signal + 1) / 2 * mu + 0.5).astype(np.uint8)


def mu_law_decode(output, quantization_channels=256):
  '''Recovers waveform from quantized values.
  Adapted from https://github.com/ibab/tensorflow-wavenet
  '''
  with tf.name_scope('decode'):
    mu = quantization_channels - 1
    signal = 2 * (tf.to_float(output) / mu) - 1
    magnitude = (1 / mu) * ((1 + mu)**abs(signal) - 1)
    return tf.sign(signal) * magnitude


def gray2jet(x):
  ''' NHWC (channel last) format '''
  def line(x, xa, xb, ya, yb):
    ''' a line determined by two points '''
    return ya + (x - xa) * (yb - ya) / (xb - xa)

  def clip_to_boundary(line1, line2, minval, maxval):
    with tf.name_scope('ClipToBoundary'):
      x = tf.minimum(line1, line2)
      x = tf.minimum(x, maxval)
      x = tf.maximum(x, minval)
      return x

  with tf.name_scope('Gray2Jet'):
    r = clip_to_boundary(
      line(x, .3515, .66, 0., 1.),
      line(x, .8867, 1., 1., .5),
      minval=0.,
      maxval=1.,
    )
    g = clip_to_boundary(
      line(x, .125, .375, 0., 1.),
      line(x, .64, .91, 1., 0.),
      minval=0.,
      maxval=1.,
    )
    b = clip_to_boundary(
      line(x, .0, .1132, 0.5, 1.0),
      line(x, .34, .648, 1., 0.),
      minval=0.,
      maxval=1.,
    )
    return tf.concat([r, g, b], axis=-1)


def spectrogram(x, frame_length, nfft=1024):
  ''' Spectrogram of non-overlapping window '''
  with tf.name_scope('Spectrogram'):
    shape = tf.shape(x)
    b = shape[0]
    D = frame_length
    t = shape[1] // D
    x = tf.reshape(x, [b, t, D])

    window = tf.contrib.signal.hann_window(frame_length)
    window = tf.expand_dims(window, 0)
    window = tf.expand_dims(window, 0) # [1, 1, L]
    x = x * window

    pad = tf.zeros([b, t, nfft - D])
    x = tf.concat([x, pad], -1)
    x = tf.cast(x, tf.complex64)
    X = tf.fft(x)  # TF's API doesn't do padding automatically yet

    X = tf.log(tf.abs(X) + 1e-2)

    X = X[:, :, :nfft//2 + 1]
    X = tf.transpose(X, [0, 2, 1])
    X = tf.reverse(X, [1])
    X = tf.expand_dims(X, -1)

    X = (X - tf.reduce_min(X)) / (tf.reduce_max(X) - tf.reduce_min(X))
    X = gray2jet(X)

    tf.summary.image('spectrogram', X)
    return X


def visualize_wav_prob(x, name):
  with tf.name_scope('VisualizeWavProbability'):
    x = tf.nn.softmax(x)  # [b, T, c]
    x = tf.transpose(x, [0, 2, 1])
    x = tf.expand_dims(x, -1)
    tf.summary.image(name, x)


def visualize_wav(x, n_symbol, name):
  with tf.name_scope('VisualizeWav'):
    x = tf.one_hot(x, n_symbol)  # [b, T,]
    x = tf.transpose(x, [0, 2, 1])
    x = tf.expand_dims(x, -1)
    tf.summary.image(name, x)


def visualize_latent_distr(z_posterior, name):
  '''
  [b, T, c]
  NOTE: feeding [b, h, c] into tf.summary.image results in non-showable TB
  '''
  with tf.name_scope('VisualizeLatentDistr'):
    z_posterior = tf.transpose(z_posterior, [0, 2, 1])
    z_posterior = tf.expand_dims(z_posterior, -1)
    tf.summary.image(name, z_posterior)
    tf.summary.histogram(name, z_posterior)