python source code of generators

from __future__ import division

import numpy as np
from scipy import linalg
from scipy.signal import fftconvolve
import datetime
import os
import matplotlib.pyplot as plt
import mkl_fft

from utils import polar2cart

import pyroomacoustics as pra

def unit_vec(doa):
    """
    This function takes a 2D (phi) or 3D (phi,theta) polar coordinates
    and returns a unit vector in cartesian coordinates.

    :param doa: (ndarray) An (D-1)-by-N array where D is the dimension and
                N the number of vectors.

    :return: (ndarray) A D-by-N array of unit vectors (each column is a vector)
    """

    if doa.ndim != 1 and doa.ndim != 2:
        raise ValueError("DoA array should be 1D or 2D.")

    doa = np.array(doa)

    if doa.ndim == 0 or doa.ndim == 1:
        return np.array([np.cos(doa), np.sin(doa)])

    elif doa.ndim == 2 and doa.shape[0] == 1:
        return np.array([np.cos(doa[0]), np.sin(doa[0])])

    elif doa.ndim == 2 and doa.shape[0] == 2:
        s = np.sin(doa[1])
        return np.array([s * np.cos(doa[0]), s * np.sin(doa[0]), np.cos(doa[1])])


def gen_far_field_ir(doa, R, fs):
    """
    This function generates the impulse responses for all microphones for
    K sources in the far field.

    :param doa: (nd-array) The sources direction of arrivals. This should
                be a (D-1)xK array where D is the dimension (2 or 3) and K
                is the number of sources
    :param R: the locations of the microphones
    :param fs: sampling frequency

    :return ir: (ndarray) A KxMxL array containing all the fractional delay
                filters between each source (axis 0) and microphone (axis 1)
                L is the length of the filter
    """

    # make sure these guys are nd-arrays
    doa = np.array(doa)

    if doa.ndim == 0:
        doa = np.array([[doa]])

    elif doa.ndim == 1:
        doa = np.array([doa])

    # the number of microphones
    M = R.shape[1]
    dim = R.shape[0]

    # the number of sources
    K = doa.shape[1]

    # convert the spherical coordinates to unit propagation vectors
    p_vec = -unit_vec(doa)

    # the delays are the inner product between unit vectors and mic locations
    # set zero delay at earliest microphone
    delays = np.dot(p_vec.T, R) / pra.constants.get('c')
    delays -= delays.min()

    # figure out the maximal length of the impulse responses
    L = pra.constants.get('frac_delay_length')
    t_max = delays.max()
    D = int(L + np.ceil(np.abs(t_max * fs)))

    # the impulse response filter bank
    fb = np.zeros((K, M, D))

    # create all the impulse responses
    for k in xrange(K):
        for m in xrange(M):
            t = delays[k, m]
            delay_s = t * fs
            delay_i = int(np.round(delay_s))
            delay_f = delay_s - delay_i
            fb[k, m, delay_i:delay_i + (L - 1) + 1] += pra.fractional_delay(delay_f)

    return fb


def gen_speech_at_mic_stft(phi_ks, source_signals, mic_array_coord, noise_power, fs, fft_size=1024):
    """
    generate microphone signals with short time Fourier transform
    :param phi_ks: azimuth of the acoustic sources
    :param source_signals: speech signals for each arrival angle, one per row
    :param mic_array_coord: x and y coordinates of the microphone array
    :param noise_power: the variance of the microphone noise signal
    :param fs: sampling frequency
    :param fft_size: number of FFT bins
    :return: y_hat_stft: received (complex) signal at microphones
             y_hat_stft_noiseless: the noiseless received (complex) signal at microphones
    """
    frame_shift_step = np.int(fft_size / 1.)  # half block overlap for adjacent frames
    K = source_signals.shape[0]  # number of point sources
    num_mic = mic_array_coord.shape[1]  # number of microphones

    # Generate the impulse responses for the array and source directions
    impulse_response = gen_far_field_ir(np.reshape(phi_ks, (1, -1), order='F'),
                                        mic_array_coord, fs)
    # Now generate all the microphone signals
    y = np.zeros((num_mic, source_signals.shape[1] + impulse_response.shape[2] - 1), dtype=np.float32)
    for src in xrange(K):
        for mic in xrange(num_mic):
            y[mic] += fftconvolve(impulse_response[src, mic], source_signals[src])

    # Now do the short time Fourier transform
    # The resulting signal is M x fft_size/2+1 x number of frames
    y_hat_stft_noiseless = \
        np.array([pra.stft(signal, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for signal in y]) / np.sqrt(fft_size)

    # Add noise to the signals
    y_noisy = y + np.sqrt(noise_power) * np.array(np.random.randn(*y.shape), dtype=np.float32)
    # compute sources stft
    source_stft = \
        np.array([pra.stft(s_loop, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for s_loop in source_signals]) / np.sqrt(fft_size)

    y_hat_stft = \
        np.array([pra.stft(signal, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for signal in y_noisy]) / np.sqrt(fft_size)

    return y_hat_stft, y_hat_stft_noiseless, source_stft


def gen_sig_at_mic_stft(phi_ks, alpha_ks, mic_array_coord, SNR, fs, fft_size=1024, Ns=256):
    """
    generate microphone signals with short time Fourier transform
    :param phi_ks: azimuth of the acoustic sources
    :param alpha_ks: power of the sources
    :param mic_array_coord: x and y coordinates of the microphone array
    :param SNR: signal to noise ratio at the microphone
    :param fs: sampling frequency
    :param fft_size: number of FFT bins
    :param Ns: number of time snapshots used to estimate covariance matrix
    :return: y_hat_stft: received (complex) signal at microphones
             y_hat_stft_noiseless: the noiseless received (complex) signal at microphones
    """
    frame_shift_step = np.int(fft_size / 1.)  # half block overlap for adjacent frames
    K = alpha_ks.shape[0]  # number of point sources
    num_mic = mic_array_coord.shape[1]  # number of microphones

    # Generate the impulse responses for the array and source directions
    impulse_response = gen_far_field_ir(np.reshape(phi_ks, (1, -1), order='F'),
                                        mic_array_coord, fs)

    # Now generate some noise
    # source_signal = np.random.randn(K, Ns * fft_size) * np.sqrt(alpha_ks[:, np.newaxis])
    source_signal = np.random.randn(K, fft_size + (Ns - 1) * frame_shift_step) * \
                    np.sqrt(np.reshape(alpha_ks, (-1, 1), order='F'))

    # Now generate all the microphone signals
    y = np.zeros((num_mic, source_signal.shape[1] + impulse_response.shape[2] - 1), dtype=np.float32)
    for src in xrange(K):
        for mic in xrange(num_mic):
            y[mic] += fftconvolve(impulse_response[src, mic], source_signal[src])

    # Now do the short time Fourier transform
    # The resulting signal is M x fft_size/2+1 x number of frames
    y_hat_stft_noiseless = \
        np.array([pra.stft(signal, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for signal in y]) / np.sqrt(fft_size)

    # compute noise variance based on SNR
    signal_energy = linalg.norm(y_hat_stft_noiseless.flatten()) ** 2
    noise_energy = signal_energy / 10 ** (SNR * 0.1)
    sigma2_noise = noise_energy / y_hat_stft_noiseless.size

    # Add noise to the signals
    y_noisy = y + np.sqrt(sigma2_noise) * np.array(np.random.randn(*y.shape), dtype=np.float32)

    y_hat_stft = \
        np.array([pra.stft(signal, fft_size, frame_shift_step, transform=mkl_fft.rfft).T
                  for signal in y_noisy]) / np.sqrt(fft_size)

    return y_hat_stft, y_hat_stft_noiseless


def gen_sig_at_mic(sigmak2_k, phi_k, pos_mic_x,
                   pos_mic_y, omega_band, sound_speed,
                   SNR, Ns=256):
    """
    generate complex base-band signal received at microphones
    :param sigmak2_k: the variance of the circulant complex Gaussian signal
                emitted by the K sources
    :param phi_k: source locations (azimuths)
    :param pos_mic_x: a vector that contains microphones' x coordinates
    :param pos_mic_y: a vector that contains microphones' y coordinates
    :param omega_band: mid-band (ANGULAR) frequency [radian/sec]
    :param sound_speed: speed of sound
    :param SNR: SNR for the received signal at microphones
    :param Ns: number of snapshots used to estimate the covariance matrix
    :return: y_mic: received (complex) signal at microphones
    """
    num_mic = pos_mic_x.size
    xk, yk = polar2cart(1, phi_k)  # source locations in cartesian coordinates
    # reshape to use broadcasting
    xk = np.reshape(xk, (1, -1), order='F')
    yk = np.reshape(yk, (1, -1), order='F')
    pos_mic_x = np.reshape(pos_mic_x, (-1, 1), order='F')
    pos_mic_y = np.reshape(pos_mic_y, (-1, 1), order='F')

    t = np.reshape(np.linspace(0, 10 * np.pi, num=Ns), (1, -1), order='F')
    K = sigmak2_k.size
    sigmak2_k = np.reshape(sigmak2_k, (-1, 1), order='F')

    # x_tilde_k size: K x length_of_t
    # circular complex Gaussian process
    x_tilde_k = np.sqrt(sigmak2_k / 2.) * (np.random.randn(K, Ns) + 1j *
                                           np.random.randn(K, Ns))
    y_mic = np.dot(np.exp(-1j * (xk * pos_mic_x + yk * pos_mic_y) / (sound_speed / omega_band)),
                   x_tilde_k * np.exp(1j * omega_band * t))
    signal_energy = linalg.norm(y_mic, 'fro') ** 2
    noise_energy = signal_energy / 10 ** (SNR * 0.1)
    sigma2_noise = noise_energy / (Ns * num_mic)
    noise = np.sqrt(sigma2_noise / 2.) * (np.random.randn(*y_mic.shape) + 1j *
                                          np.random.randn(*y_mic.shape))
    y_mic_noisy = y_mic + noise
    return y_mic_noisy, y_mic


def gen_visibility(alphak, phi_k, pos_mic_x, pos_mic_y):
    """
    generate visibility from the Dirac parameter and microphone array layout
    :param alphak: Diracs' amplitudes
    :param phi_k: azimuths
    :param pos_mic_x: a vector that contains microphones' x coordinates
    :param pos_mic_y: a vector that contains microphones' y coordinates
    :return:
    """
    xk, yk = polar2cart(1, phi_k)
    num_mic = pos_mic_x.size
    visi = np.zeros((num_mic, num_mic), dtype=complex)
    for q in xrange(num_mic):
        p_x_outer = pos_mic_x[q]
        p_y_outer = pos_mic_y[q]
        for qp in xrange(num_mic):
            p_x_qqp = p_x_outer - pos_mic_x[qp]  # a scalar
            p_y_qqp = p_y_outer - pos_mic_y[qp]  # a scalar
            visi[qp, q] = np.dot(np.exp(-1j * (xk * p_x_qqp + yk * p_y_qqp)), alphak)
    return visi


def gen_dirty_img(visi, pos_mic_x, pos_mic_y, omega_band, sound_speed, phi_plt):
    """
    Compute the dirty image associated with the given measurements. Here the Fourier transform
    that is not measured by the microphone array is taken as zero.
    :param visi: the measured visibilites
    :param pos_mic_x: a vector contains microphone array locations (x-coordinates)
    :param pos_mic_y: a vector contains microphone array locations (y-coordinates)
    :param omega_band: mid-band (ANGULAR) frequency [radian/sec]
    :param sound_speed: speed of sound
    :param phi_plt: plotting grid (azimuth on the circle) to show the dirty image
    :return:
    """
    img = np.zeros(phi_plt.size, dtype=complex)
    x_plt, y_plt = polar2cart(1, phi_plt)
    num_mic = pos_mic_x.size

    pos_mic_x_normalised = pos_mic_x / (sound_speed / omega_band)
    pos_mic_y_normalised = pos_mic_y / (sound_speed / omega_band)

    count_visi = 0
    for q in xrange(num_mic):
        p_x_outer = pos_mic_x_normalised[q]
        p_y_outer = pos_mic_y_normalised[q]
        for qp in xrange(num_mic):
            if not q == qp:
                p_x_qqp = p_x_outer - pos_mic_x_normalised[qp]  # a scalar
                p_y_qqp = p_y_outer - pos_mic_y_normalised[qp]  # a scalar
                # <= the negative sign converts DOA to propagation vector
                img += visi[count_visi] * \
                       np.exp(-1j * (p_x_qqp * x_plt + p_y_qqp * y_plt))
                count_visi += 1
    return img / (num_mic * (num_mic - 1))


def gen_mic_array_2d(radius_array, num_mic=3, save_layout=True,
                     divi=3, plt_layout=False, **kwargs):
    """
    generate microphone array layout randomly
    :param radius_array: microphones are contained within a cirle of this radius
    :param num_mic: number of microphones
    :param save_layout: whether to save the microphone array layout or not
    :return:
    """
    # pos_array_norm = np.linspace(0, radius_array, num=num_mic, dtype=float)
    # pos_array_angle = np.linspace(0, 5 * np.pi, num=num_mic, dtype=float)
    num_seg = np.ceil(num_mic / divi)
    # radius_stepsize = radius_array / num_seg

    # pos_array_norm = np.append(np.repeat((np.arange(num_seg) + 1) * radius_stepsize,
    #                                      divi)[:num_mic-1], 0)
    pos_array_norm = np.linspace(0, radius_array, num=num_mic, endpoint=False)

    # pos_array_angle = np.append(np.tile(np.pi * 2 * np.arange(divi) / divi, num_seg)[:num_mic-1], 0)
    pos_array_angle = np.reshape(np.tile(np.pi * 2 * np.arange(divi) / divi, num_seg),
                                 (divi, -1), order='F') + \
                      np.linspace(0, 2 * np.pi / divi,
                                  num=num_seg, endpoint=False)[np.newaxis, :]
    pos_array_angle = np.insert(pos_array_angle.flatten('F')[:num_mic - 1], 0, 0)

    pos_array_angle += np.random.rand() * np.pi / divi
    # pos_array_norm = np.random.rand(num_mic) * radius_array
    # pos_array_angle = 2 * np.pi * np.random.rand(num_mic)

    pos_mic_x = pos_array_norm * np.cos(pos_array_angle)
    pos_mic_y = pos_array_norm * np.sin(pos_array_angle)

    layout_time_stamp = datetime.datetime.now().strftime('%d-%m')
    if save_layout:
        directory = './data/'
        if not os.path.exists(directory):
            os.makedirs(directory)
        file_name = directory + 'mic_layout_' + layout_time_stamp + '.npz'
        np.savez(file_name, pos_mic_x=pos_mic_x, pos_mic_y=pos_mic_y,
                 layout_time_stamp=layout_time_stamp)

    if plt_layout:
        plt.figure(figsize=(2.5, 2.5), dpi=90)
        plt.plot(pos_mic_x, pos_mic_y, 'x')
        plt.axis('image')
        plt.xlim([-radius_array, radius_array])
        plt.ylim([-radius_array, radius_array])
        plt.title('microphone array layout', fontsize=11)

        if 'save_fig' in kwargs:
            save_fig = kwargs['save_fig']
        else:
            save_fig = False
        if 'fig_dir' in kwargs and save_fig:
            fig_dir = kwargs['fig_dir']
        else:
            fig_dir = './result/'
        if save_fig:
            if not os.path.exists(fig_dir):
                os.makedirs(fig_dir)
            fig_name = (fig_dir + 'polar_numMic_{0}_layout' +
                        layout_time_stamp + '.pdf').format(repr(num_mic))
            plt.savefig(fig_name, format='pdf', dpi=300, transparent=True)

            # plt.show()
    return pos_mic_x, pos_mic_y, layout_time_stamp


def gen_diracs_param(K, num_band=1, positive_amp=True, log_normal_amp=False,
                     semicircle=True, save_param=True):
    """
    randomly generate Diracs' parameters
    :param K: number of Diracs
    :param positive_amp: whether Diracs have positive amplitudes or not
    :param log_normal_amp: whether the Diracs amplitudes follow log-normal distribution.
    :param semicircle: whether the Diracs are located on half of the circle or not.
    :param save_param: whether to save the Diracs' parameter or not.
    :return:
    """
    # amplitudes
    if log_normal_amp:
        positive_amp = True
    if not positive_amp:
        alpha_ks = np.column_stack([np.sign(np.random.randn(K)) *
                                    (0.7 + 0.6 * (np.random.rand(K) - 0.5) / 1.)
                                    for band_count in xrange(num_band)])
    elif log_normal_amp:
        alpha_ks = np.column_stack([np.random.lognormal(mean=np.log(2),
                                                        sigma=0.7, size=K)
                                    for band_count in xrange(num_band)])
    else:
        alpha_ks = np.column_stack([0.7 + 0.6 * (np.random.rand(K) - 0.5) / 1.
                                    for band_count in xrange(num_band)])

    # location on the circle (S^1)
    if semicircle:
        factor = 1
    else:
        factor = 2

    min_sep = 1. / 30
    # exp_rnd = np.random.exponential(1. / (K - 1), K - 1)
    # phi_ks = np.cumsum(min_sep + (1 - (K - 1) * min_sep) *
    #                    (1. - 0.1 * np.random.rand(1, 1)) /
    #                    np.sum(exp_rnd) * exp_rnd)
    # phi_ks = factor * np.pi * np.append(phi_ks, np.random.rand() * phi_ks[0] / 2.)
    exp_rnd = np.random.exponential(1. / K, K)
    phi_ks = factor * np.pi * np.cumsum(min_sep + (1 - K * min_sep) *
                                        (1. - 0.1 * np.random.rand(1, 1)) /
                                        np.sum(exp_rnd) * exp_rnd)

    time_stamp = datetime.datetime.now().strftime('%d-%m_%H_%M')
    if save_param:
        if not os.path.exists('./data/'):
            os.makedirs('./data/')
        file_name = './data/polar_Dirac_' + time_stamp + '.npz'
        np.savez(file_name, alpha_ks=alpha_ks,
                 phi_ks=phi_ks, K=K, time_stamp=time_stamp)
    return alpha_ks, phi_ks, time_stamp

# # if uncommented, use: from tools_fri_doa_plane import extract_off_diag
# def add_noise(visi_noiseless, var_noise, num_mic, Ns=256):
#     """
#     add noise to the noiselss visibility
#     :param visi_noiseless: noiseless visibilities
#     :param var_noise: variance of noise
#     :param num_mic: number of microphones
#     :param Ns: number of samples used to estimate the covariance matrix
#     :return:
#     """
#     sigma_mtx = visi_noiseless + var_noise * np.eye(*visi_noiseless.shape)
#     wischart_mtx = np.kron(sigma_mtx.conj(), sigma_mtx) / Ns
#     # the noise vairance matrix is given by the Cholesky decomposition
#     noise_conv_mtx_sqrt = np.linalg.cholesky(wischart_mtx)
#     visi_noiseless_vec = np.reshape(visi_noiseless, (-1, 1), order='F')
#     noise = np.dot(noise_conv_mtx_sqrt,
#                    np.random.randn(*visi_noiseless_vec.shape) +
#                    1j * np.random.randn(*visi_noiseless_vec.shape)) / np.sqrt(2)
#     # a matrix form
#     visi_noisy = np.reshape(visi_noiseless_vec + noise, visi_noiseless.shape, order='F')
#     # extract the off-diagonal entries
#     visi_noisy = extract_off_diag(visi_noisy)
#     visi_noiseless_off_diag = extract_off_diag(visi_noiseless)
#     # calculate the equivalent SNR
#     noise = visi_noisy - visi_noiseless_off_diag
#     P = 20 * np.log10(linalg.norm(visi_noiseless_off_diag) / linalg.norm(noise))
#     return visi_noisy, P, noise, visi_noiseless_off_diag