python source code of batcher

import sys

import numpy as np

import triplet_loss
from batcher import KerasFormatConverter, TripletBatcherSelectHardNegatives, TripletBatcher
from constants import NUM_FBANKS, NUM_FRAMES, CHECKPOINTS_TRIPLET_DIR, CHECKPOINTS_SOFTMAX_DIR, BATCH_SIZE
from conv_models import DeepSpeakerModel
from triplet_loss import deep_speaker_loss
from utils import load_best_checkpoint


def predict(x):
    y = np.tile(np.expand_dims(np.mean(x, axis=(1, 2, 3)), axis=1), (1, 512))
    return y
    # norm_y = np.linalg.norm(y, axis=1, ord=2, keepdims=True)
    # return y / (norm_y + 1e-12)


def main2():
    num_utterances_per_speaker = 50
    num_speakers = 100
    num_samples = num_speakers * num_utterances_per_speaker
    kx_train = np.zeros(shape=(num_samples, 32, 64, 1))
    ky_train = np.zeros(shape=(num_samples, num_speakers))
    for i in range(num_samples):
        speaker_id = i % num_speakers
        ky_train[i, speaker_id] = 1
        kx_train[i] = speaker_id
    kx_test = np.array(kx_train)
    ky_test = np.array(ky_train)

    tpshn = TripletBatcherSelectHardNegatives(kx_train, ky_train, kx_test, ky_test, None)
    tp = TripletBatcher(kx_train, ky_train, kx_test, ky_test)
    avg = []
    avg2 = []
    while True:
        bx, by = tp.get_batch(BATCH_SIZE, is_test=False)
        avg.append(float(triplet_loss.deep_speaker_loss(predict(bx), predict(bx))))

        bx, by = tpshn.get_batch(BATCH_SIZE, is_test=False, predict=predict)
        avg2.append(float(triplet_loss.deep_speaker_loss(predict(bx), predict(bx))))

        print(np.mean(avg), np.mean(avg2))


def main():
    select = True
    try:
        sys.argv[1]
    except:
        select = False
    print('select', select)

    working_dir = '/media/philippe/8TB/deep-speaker'
    # by construction this  losses should be much higher than the normal losses.
    # we select batches this way.
    batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1]
    print('Testing with the triplet losses.')
    dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False)
    triplet_checkpoint = load_best_checkpoint(CHECKPOINTS_TRIPLET_DIR)
    pre_training_checkpoint = load_best_checkpoint(CHECKPOINTS_SOFTMAX_DIR)
    if triplet_checkpoint is not None:
        print(f'Loading triplet checkpoint: {triplet_checkpoint}.')
        dsm.m.load_weights(triplet_checkpoint)
    elif pre_training_checkpoint is not None:
        print(f'Loading pre-training checkpoint: {pre_training_checkpoint}.')
        # If `by_name` is True, weights are loaded into layers only if they share the
        # same name. This is useful for fine-tuning or transfer-learning models where
        # some of the layers have changed.
        dsm.m.load_weights(pre_training_checkpoint, by_name=True)
    dsm.m.compile(optimizer='adam', loss=deep_speaker_loss)
    kc = KerasFormatConverter(working_dir)
    if select:
        print('TripletBatcherSelectHardNegatives()')
        batcher = TripletBatcherSelectHardNegatives(kc.kx_train, kc.ky_train, kc.kx_test, kc.ky_test, dsm)
    else:
        print('TripletBatcher()')
        batcher = TripletBatcher(kc.kx_train, kc.ky_train, kc.kx_test, kc.ky_test)
    batch_size = BATCH_SIZE
    losses = []
    while True:
        _bx, _by = batcher.get_batch(batch_size, is_test=False)
        losses.append(dsm.m.evaluate(_bx, _by, verbose=0, batch_size=BATCH_SIZE))
        print(np.mean(losses))


if __name__ == '__main__':
    main2()