python source code of helpers

NeMo-master
- examples
  - tts
    - tts_infer.py
    - tacotron2.py
    - tacotron2_v0p9.py
    - waveglow.py
    - notebooks
      - 1_Tacotron_inference.ipynb
    - fastspeech.py
    - talknet_durs.py
    - configs
      - waveglow.yaml
      - tacotron2.yaml
      - talknet-mels-lj.yaml
      - fastspeech.yaml
      - talknet-durs-lj.yaml
      - talknet-mels-lt.yaml
      - tacotron2_mandarin.yaml
    - waveglow_v0p9.py
    - fastspeech_durations.py
    - talknet_mels.py
  - nlp
    - dialogue_state_tracking
      - dialogue_state_tracking_trade.py
      - rule_based_policy_multiwoz.py
      - dialogue_state_tracking_sgd.py
      - data
        sgd
        convert_multiwoz_to_sgd.py
        dialogue_augmentation.py
        multiwoz
        correct_categorical_state_values.tsv
        __init__.py
        process_multiwoz.py
        multiwoz_mapping.pair
        schema.json
    - text_classification
      - sentiment_analysis_with_bert.ipynb
      - text_classification_with_bert.py
      - data
        import_datasets.py
    - glue_benchmark
      - glue_benchmark_with_bert.py
    - lasertagger
      - run_summarization.sh
      - lasertagger_main.py
      - phrase_vocabulary_optimization.py
      - lasertagger_preprocessor.py
      - official_lasertagger
        tagging_converter.py
        sari_hook.py
        tagging.py
        score_lib.py
        __init__.py
        utils.py
        bert_example.py
      - README.md
    - biobert_notebooks
      - biobert_re.ipynb
      - biobert_qa.ipynb
      - biobert_ner.ipynb
    - neural_machine_translation
      - machine_translation_tutorial.py
    - intent_detection_slot_tagging
      - data
        assistant_utils.py
        import_datasets.py
        mturk_utils.py
        dialogflow_utils.py
      - joint_intent_slot_infer_b1.py
      - joint_intent_slot_with_bert.py
      - joint_intent_slot_infer.py
    - question_answering
      - get_bioasq.py
      - question_answering_squad.py
      - get_squad.py
    - token_classification
      - get_medical_data.py
      - token_classification.py
      - punctuation_capitalization_infer.py
      - import_from_iob_format.py
      - PunctuationWithBERT.ipynb
      - NERWithBERT.ipynb
      - punctuation_capitalization.py
      - token_classification_infer.py
      - get_tatoeba_data.py
    - language_modeling
      - create_vocab.py
      - BERTPretrainingTutorial.ipynb
      - language_modeling_transformer.py
      - process_wiki_zh.py
      - get_wkt2.sh
      - bert_pretraining.py
    - asr_postprocessor
      - get_decoder_params_from_bert.py
      - asr_postprocessor.py
  - asr
    - an4-lm.3gram.binary
    - speech2text.py
    - QuartzNetModel.ipynb
    - jasper_an4.py
    - speech2text_infer.py
    - notebooks
      - 4_Online_Data_Augmentation.ipynb
      - images
      - 5_Online_Speech_Commands_Microphone_Demo.ipynb
      - 6_VAD_using_NeMo.ipynb
      - 1_ASR_tutorial_using_NeMo.ipynb
      - 7_VAD_Offline_Online_Microphone_Demo.ipynb
      - 3_Speech_Commands_using_NeMo.ipynb
      - configs
        jasper_an4.yaml
      - README.md
      - 2_Online_ASR_Microphone_Demo.ipynb
    - quartznet_vad.py
    - configs
      - quartznet_speech_commands_3x1_v1.yaml
      - quartznet_vad_3x1.yaml
      - quartznet15x5.yaml
      - jasper10x5.yaml
      - qn-multien.yaml
      - quartznet15x5_8kHz.yaml
      - qn-libritts.yaml
      - jasper_an4.yaml
      - quartznet15x5-zh.yaml
      - quartznet_speech_commands_3x1_v2.yaml
    - experimental
      - garnet_rnnlm.py
      - configs
        garnet_an4.yaml
        quartznet5x3.yaml
        quartznet15x5_in.yaml
        garnet.yaml
        quartznet5x5.yaml
        garnet_rnn_lm.yaml
        quartznet_an4.yaml
        quartznet15x5_groupedconv.yaml
        jasper10x4.yaml
        quartznet10x5.yaml
      - garnet.py
      - contextnet.py
    - quartznet_speech_commands.py
    - other
      - quartznet.py
      - quartznet15x5-old.yaml
      - jasper.py
      - jasper_eval.py
      - jasper10x5-old.yaml
  - start_here
    - module_configuration.py
    - module_custom_configuration.py
    - simplest_example.py
    - chatbot_example.py
    - README.md
  - applications
    - asr_service
      - README.rst
      - recognize.html
      - requirements.txt
      - app
        routes.py
        __init__.py
      - asr_service.py
  - speaker_recognition
    - hi-mia_eval.py
    - train_plda.sh
    - spkr_get_emb.py
    - notebooks
      - Speaker_Recognition_hi-mia.ipynb
      - Speaker_Recognition_an4.ipynb
    - speaker_reco.py
    - configs
      - quartznet_spkr_3x1x512_xvector.yaml
      - quartznet_spkr_3x2x512_xvector.yaml
      - quartznet_spkr_5x1x512_xvector.yaml
    - kaldi_plda.py
  - neural_graphs
    - neural_graph_custom_training.ipynb
    - img
    - neural_graph_advanced.ipynb
    - neural_graph_basic.ipynb
  - image
    - transfer_learning.py
    - resnet50.py
    - gan.py
    - simple_gan.ipynb
    - README.md
- LICENSE
- .readthedocs.yml
- Dockerfile.jetson
- CONTRIBUTING.md
- reinstall.sh
- CHANGELOG.md
- README.rst
- setup.py
- setup.cfg
- tests
  - integration
    - test_vad_gradient_step_and_eval.py
    - test_speaker_recognition_gradient_step.py
    - test_integration_multidataset.py
    - test_speechcommands_gradient_step_and_eval.py
    - core
      - test_integration_neural_graph.py
    - test_tts_gradient_step.py
    - test_asr_gradient_step_and_eval.py
  - data
    - quartznet_speech_recognition.yaml
    - quartznet_spkr_test.yaml
    - jasper_smaller.yaml
    - nmt_en_zh_sample_data
      - train.en
      - zh_vocab.txt
      - train.zh
      - en_yttm.model
      - valid.en
      - valid.zh
      - README.md
    - en_de
      - bpe8k_yttm.model
      - valid.en
      - valid.de
    - sc_zh_sample_data
      - data
        健康
        00001.txt
        00010.txt
        00009.txt
        00003.txt
        00008.txt
        00005.txt
        00007.txt
        00006.txt
        00002.txt
        00004.txt
        艺术
        00001.txt
        00010.txt
        00009.txt
        00003.txt
        00008.txt
        00005.txt
        00007.txt
        00006.txt
        00002.txt
        00004.txt
        旅游
        00001.txt
        00010.txt
        00009.txt
        00003.txt
        00008.txt
        00005.txt
        00007.txt
        00006.txt
        00002.txt
        00004.txt
      - README.md
    - quartznet_test.yaml
    - ner_zh_sample_data
      - labels_train.txt
      - labels_dev.txt
      - README.md
      - text_dev.txt
      - text_train.txt
    - pred_real
      - valid.real
      - valid.pred
    - quartznet_vad.yaml
    - dialog_sample.txt
    - contextnet_32.yaml
  - configs
    - test_deploy_export.yaml
  - __init__.py
  - system
    - test_pytorch_trainers.py
    - test_infer.py
  - docs
    - test_documentation.py
  - unit
    - test_unit_asr.py
    - utils
      - test_deprecated.py
      - test_app_state.py
      - test_object_registry.py
    - test_spc_tokenizer.py
    - test_tutorials_cornell_data.py
    - test_unit_multidataset.py
    - test_unit_speech_commands.py
    - __init__.py
    - core
      - tensorrt_loaders.py
      - test_policies.py
      - test_nemo_callbacks.py
      - test_deploy_export.py
      - neural_graph
        test_neural_graph_serialization.py
        test_neural_graph_nesting.py
        test_neural_graph_import_export.py
        test_neural_graph_binding.py
        test_neural_graphs.py
      - test_weight_share.py
      - test_model.py
      - test_nm_tensor.py
      - test_neural_types.py
      - __init__.py
      - neural_module
        test_module_configuration.py
        test_module_configuration_import.py
        test_module_decorators.py
        test_module_initialization.py
        test_module_configuration_export.py
      - test_actions_api.py
      - tensorrt_format.py
      - tensorrt_runner.py
    - test_torch_backend.py
    - test_huggingface.py
  - conftest.py
- scripts
  - process_hub5_data.py
  - get_hi-mia_data.py
  - convert_to_tarred_audio_dataset.py
  - get_databaker_data.py
  - process_fisher_data.py
  - process_aishell2_data.py
  - build_lm_text.py
  - process_vad_data.py
  - fisher_audio_to_wav.py
  - export_jasper_to_onnx.py
  - install_decoders.sh
  - get_aishell_data.py
  - install_decoders_MacOS.sh
  - build_6-gram_OpenSLR_lm.sh
  - process_an4_data.py
  - get_librispeech_data.py
  - docker
    - onnx-trt.patch
  - get_ljspeech_data.py
  - process_speech_commands_data.py
  - export_jasper_onnx_to_trt.py
  - convert_wav_to_g711wav.py
  - quartznet_model_for_jarvis.py
  - get_timit_data.py
  - freesound_download_resample
    - freesound_requirements.txt
    - freesound_download.py
    - download_resample_freesound.sh
    - freesound_resample.py
  - scp_to_manifest.py
  - export_bert_to_trt.py
- nemo
  - utils
    - misc.py
    - decorators
      - port_docs.py
      - __init__.py
      - deprecated.py
    - metaclasses.py
    - configuration_error.py
    - app_state.py
    - neural_graph
      - neural_graph_manager.py
      - graph_outputs.py
      - connection.py
      - graph_inputs.py
      - object_registry.py
    - env_var_parsing.py
    - formatters
      - __init__.py
      - utils.py
      - colors.py
      - base.py
    - __init__.py
    - exp_logging.py
    - configuration_parsing.py
    - nemo_logging.py
    - lr_policies.py
    - argparse.py
    - helpers.py
    - nmtensor_registry.py
  - package_info.py
  - backends
    - torch_backend.py
    - pytorch
      - actions.py
      - module_wrapper.py
      - optimizers.py
      - common
        search.py
        metrics.py
        losses.py
        zero_data.py
        other.py
        __init__.py
        rnn.py
        multi_data.py
        parts.py
      - tutorials
        chatbot
        data.py
        modules.py
        __init__.py
        __init__.py
        toys.py
      - __init__.py
      - nm.py
      - torchvision
        data
        image_folder.py
        __init__.py
        __init__.py
        helpers.py
    - load_backend.py
    - __init__.py
  - constants.py
  - collections
    - tts
      - data_layers.py
      - talknet_modules.py
      - tacotron2_modules.py
      - fastspeech_modules.py
      - __init__.py
      - README.md
      - parts
        talknet.py
        manifest.py
        fastspeech_transformer.py
        tacotron2.py
        waveglow.py
        fastspeech.py
        datasets.py
        layers.py
        __init__.py
        helpers.py
      - waveglow_modules.py
    - nlp
      - callbacks
        token_classification_callback.py
        lasertagger_callback.py
        machine_translation_callback.py
        text_classification_callback.py
        joint_intent_slot_callback.py
        lm_transformer_callback.py
        state_tracking_trade_callback.py
        qa_squad_callback.py
        __init__.py
        glue_benchmark_callback.py
        sgd_callback.py
        lm_bert_callback.py
        punctuation_capitalization_callback.py
      - utils
        evaluation_utils.py
        functional_utils.py
        __init__.py
        transformer_utils.py
        data_utils.py
        callback_utils.py
      - nm
        losses
        spanning_loss.py
        sgd_loss.py
        smoothed_cross_entropy_loss.py
        __init__.py
        masked_xentropy_loss.py
        non_trainables
        dialogue_state_tracking
        user_utterance_encoder.py
        rule_based_dpm_multiwoz.py
        system_utterance_history_update.py
        trade_state_update_nm.py
        manual_system_template_nlg.json
        template_nlg_multiwoz.py
        __init__.py
        trainables
        joint_intent_slot
        __init__.py
        joint_intent_slot_classifier_nm.py
        dialogue_state_tracking
        sgd
        sgd_encoder_nm.py
        __init__.py
        sgd_decoder_nm.py
        trade_generator_nm.py
        __init__.py
        punctuation_capitalization
        punctuation_capitalization_classifier_nm.py
        __init__.py
        common
        sequence_classification_nm.py
        encoder_rnn.py
        transformer
        transformer_generators.py
        transformer_nm.py
        transformer_modules.py
        __init__.py
        transformer_encoders.py
        transformer_decoders.py
        sequence_regression_nm.py
        __init__.py
        token_classification_nm.py
        huggingface
        huggingface_utils.py
        albert_nm.py
        bert_nm.py
        __init__.py
        roberta_nm.py
        megatron
        megatron_utils.py
        megatron_bert_nm.py
        __init__.py
        common_utils.py
        __init__.py
        __init__.py
        data_layers
        bert_inference_datalayer.py
        text_datalayer.py
        state_tracking_sgd_datalayer.py
        state_tracking_trade_datalayer.py
        qa_squad_datalayer.py
        glue_benchmark_datalayer.py
        lm_bert_datalayer.py
        joint_intent_slot_datalayer.py
        punctuation_capitalization_datalayer.py
        __init__.py
        text_classification_datalayer.py
        machine_translation_datalayer.py
        token_classification_datalayer.py
        lasertagger_datalayer.py
        lm_transformer_datalayer.py
      - data
        datasets
        machine_translation_dataset.py
        lm_transformer_dataset.py
        token_classification_dataset.py
        lasertagger_dataset.py
        text_classification
        text_classification_descriptor.py
        __init__.py
        text_classification_dataset.py
        punctuation_capitalization_dataset.py
        glue_benchmark_dataset
        glue_benchmark_dataset.py
        data_processors.py
        __init__.py
        sgd_dataset
        evaluate.py
        metrics.py
        schema.py
        prediction_utils.py
        schema_embedding_dataset.py
        input_example.py
        schema_processor.py
        sgd_dataset.py
        data_processor.py
        datasets_utils
        data_preprocessing.py
        __init__.py
        datasets_processing.py
        multiwoz_dataset
        state.py
        multiwoz_slot_trans.py
        dbquery.py
        __init__.py
        multiwoz_dataset.py
        __init__.py
        lm_bert_dataset.py
        joint_intent_slot_dataset
        joint_intent_slot_descriptor.py
        inference_utils.py
        __init__.py
        joint_intent_slot_dataset.py
        qa_squad_dataset
        qa_squad_processing.py
        qa_squad_dataset.py
        tokenizers
        tokenizer_utils.py
        bert_tokenizer.py
        __init__.py
        sentencepiece_tokenizer.py
        word_tokenizer.py
        fairseq_tokenizer.py
        gpt2_tokenizer.py
        char_tokenizer.py
        tokenizer_spec.py
        youtokentome_tokenizer.py
        __init__.py
      - __init__.py
      - README.md
      - metrics
        sacrebleu.py
        __init__.py
        squad_metrics.py
        bleu.py
      - neural_types.py
    - asr
      - beam_search_decoder.py
      - jasper.py
      - metrics.py
      - models
        asrconvctcmodel.py
        __init__.py
      - losses.py
      - data_layer.py
      - audio_preprocessing.py
      - __init__.py
      - README.md
      - greedy_ctc_decoder.py
      - parts
        features.py
        perturb.py
        numba_utils.py
        jasper.py
        segment.py
        manifest.py
        spectr_augment.py
        collections.py
        cleaners.py
        __init__.py
        dataset.py
        parsers.py
      - las
        misc.py
        __init__.py
        helpers.py
      - contextnet.py
      - helpers.py
    - simple_gan
      - gan.py
      - __init__.py
      - README.md
    - __init__.py
    - cv
      - examples
        cifar10_convnet_ffn_image_classification.py
        cifar10_resnet50_image_classification.py
        mnist_lenet5_image_classification.py
        mnist_ffn_image_classification.py
        mnist_convnet_ffn_image_classification.py
        cifar100_vgg16_ffn_image_classification.py
      - __init__.py
      - README.md
      - modules
        losses
        __init__.py
        nll_loss.py
        non_trainables
        reshape_tensor.py
        non_linearity.py
        __init__.py
        trainables
        image_encoder.py
        lenet5.py
        convnet_encoder.py
        __init__.py
        feed_forward_network.py
        __init__.py
        data_layers
        cifar100_datalayer.py
        cifar10_datalayer.py
        mnist_datalayer.py
        stl10_datalayer.py
        __init__.py
  - __init__.py
  - README.md
  - core
    - actions.py
    - deprecated_callbacks.py
    - nemo_model.py
    - module_decorators.py
    - neural_types
      - elements.py
      - axes.py
      - __init__.py
      - comparison.py
      - neural_type.py
    - neural_modules.py
    - __init__.py
    - callbacks.py
    - neural_graph.py
    - neural_factory.py
    - neural_interface.py
- Dockerfile
- .gitignore
- Jenkinsfile
- docs
  - .nojekyll
  - sources
    - update_docs.sh
    - Makefile
    - source
      - tts
        datasets.rst
        intro.rst
        tutorial.rst
        tacotron2.rst
        fastspeech.rst
        waveglow.rst
        models.rst
      - nlp
        question_answering.rst
        megatron_finetuning.rst
        ner.rst
        text_classification.rst
        glue.rst
        nlp_all_refs.bib
        intro.rst
        punctuation.rst
        bert_pretraining.rst
        dialogue_state_tracking_trade.rst
        asr-improvement.rst
        joint_intent_slot_filling.rst
        neural_machine_translation.rst
        transformer_language_model.rst
        dialogue_state_tracking_sgd.rst
      - asr
        datasets.rst
        jasper.rst
        intro.rst
        quartznet.rst
        8kHz_models.rst
        tutorial.rst
        asr_all.bib
        models.rst
        installation.rst
      - voice_activity_detection
        datasets.rst
        intro.rst
        installation_link.rst
        matchboxnet.rst
        vad_all.bib
        tutorial.rst
        models.rst
      - training.rst
      - api-docs
        modules.rst
        nemo.rst
      - tutorials
        neuraltypes.rst
        callbacks.rst
        intro.rst
        neural_graphs.rst
        module_custom_configuration.rst
        examples.rst
        complex_training.rst
        program_model.rst
        old_callbacks.rst
        weightsharing.rst
        module_configuration.rst
        custommodules.rst
      - collections
        nemo_cv.rst
        core.rst
        nemo_tts.rst
        modules.rst
        nemo_nlp.rst
        nemo_asr.rst
      - chinese
        intro.rst
      - speech_command
        datasets.rst
        intro.rst
        quartznet.rst
        installation_link.rst
        tutorial.rst
        speech_recognition_all.bib
        models.rst
      - speaker_recognition
        datasets.rst
        intro.rst
        quartznet.rst
        installation_link.rst
        tutorial.rst
        models.rst
        speaker.bib
      - index.rst
      - conf.py
    - update_docs_docker.sh
  - docs_zh
    - sources
      - update_docs.sh
      - Makefile
      - source
        tts
        datasets.rst
        intro.rst
        tutorial.rst
        tacotron2.rst
        fastspeech.rst
        waveglow.rst
        models.rst
        nlp
        question_answering.rst
        ner.rst
        nlp_all_refs.bib
        intro.rst
        punctuation.rst
        bert_pretraining.rst
        dialogue_state_tracking_trade.rst
        asr-improvement.rst
        joint_intent_slot_filling.rst
        neural_machine_translation.rst
        transformer_language_model.rst
        asr
        datasets.rst
        jasper.rst
        intro.rst
        quartznet.rst
        tutorial.rst
        asr_all.bib
        models.rst
        training.rst
        api-docs
        modules.rst
        nemo.rst
        tutorials
        neuraltypes.rst
        callbacks.rst
        intro.rst
        module_custom_configuration.rst
        examples.rst
        complex_training.rst
        program_model.rst
        weightsharing.rst
        module_configuration.rst
        custommodules.rst
        collections
        core.rst
        nemo_tts.rst
        modules.rst
        nemo_nlp.rst
        nemo_asr.rst
        speech_command
        datasets.rst
        intro.rst
        quartznet.rst
        tutorial.rst
        speech_recognition_all.bib
        models.rst
        index.rst
        conf.py
- .dockerignore
- requirements
  - requirements_nlp.txt
  - requirements_test.txt
  - requirements_docs.txt
  - requirements_tts.txt
  - requirements_simple_gan.txt
  - requirements_asr.txt
  - requirements_docker.txt
  - requirements.txt
  - requirements_cv.txt
- MANIFEST.in

# Copyright (c) 2019 NVIDIA Corporation
import librosa
import matplotlib.pylab as plt
import numpy as np
import torch

from nemo.utils import logging

__all__ = [
    "waveglow_log_to_tb_func",
    "waveglow_process_eval_batch",
    "waveglow_eval_log_to_tb_func",
    "tacotron2_log_to_tb_func",
    "tacotron2_process_eval_batch",
    "tacotron2_process_final_eval",
    "tacotron2_eval_log_to_tb_func",
]


def griffin_lim(magnitudes, n_iters=50, n_fft=1024):
    """
    Griffin-Lim algorithm to convert magnitude spectrograms to audio signals
    """
    phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape))
    complex_spec = magnitudes * phase
    signal = librosa.istft(complex_spec)
    if not np.isfinite(signal).all():
        logging.warning("audio was not finite, skipping audio saving")
        return np.array([0])

    for _ in range(n_iters):
        _, phase = librosa.magphase(librosa.stft(signal, n_fft=n_fft))
        complex_spec = magnitudes * phase
        signal = librosa.istft(complex_spec)
    return signal


def waveglow_log_to_tb_func(
    swriter,
    tensors,
    step,
    tag="train",
    log_images=False,
    log_images_freq=1,
    n_fft=1024,
    hop_length=256,
    window="hann",
    mel_fb=None,
):
    loss, audio_pred, spec_target, mel_length = tensors
    if loss:
        swriter.add_scalar("loss", loss, step)
    if log_images and step % log_images_freq == 0:
        mel_length = mel_length[0]
        spec_target = spec_target[0].data.cpu().numpy()[:, :mel_length]
        swriter.add_image(
            f"{tag}_mel_target", plot_spectrogram_to_numpy(spec_target), step, dataformats="HWC",
        )
        if mel_fb is not None:
            mag, _ = librosa.core.magphase(
                librosa.core.stft(
                    np.nan_to_num(audio_pred[0].cpu().detach().numpy()),
                    n_fft=n_fft,
                    hop_length=hop_length,
                    window=window,
                )
            )
            mel_pred = np.matmul(mel_fb.cpu().numpy(), mag).squeeze()
            log_mel_pred = np.log(np.clip(mel_pred, a_min=1e-5, a_max=None))
            swriter.add_image(
                f"{tag}_mel_predicted",
                plot_spectrogram_to_numpy(log_mel_pred[:, :mel_length]),
                step,
                dataformats="HWC",
            )


def waveglow_process_eval_batch(tensors: dict, global_vars: dict):
    if 'tensorboard' not in global_vars.keys():
        global_vars['tensorboard'] = {}
        for k, v in tensors.items():
            if k.startswith("processed_signal"):
                global_vars['tensorboard']['mel_target'] = v[0]
            if k.startswith("audio"):
                global_vars['tensorboard']['audio_pred'] = v[0]
            if k.startswith("processed_length"):
                global_vars['tensorboard']['mel_length'] = v[0]


def waveglow_eval_log_to_tb_func(
    swriter, global_vars, step, tag=None, n_fft=1024, hop_length=256, window="hann", mel_fb=None,
):
    spec_target = global_vars['tensorboard']["mel_target"]
    audio_pred = global_vars['tensorboard']["audio_pred"]
    mel_length = global_vars['tensorboard']['mel_length']
    waveglow_log_to_tb_func(
        swriter,
        [None, audio_pred, spec_target, mel_length],
        step,
        tag=tag,
        log_images=True,
        n_fft=n_fft,
        hop_length=hop_length,
        window=window,
        mel_fb=mel_fb,
    )


def tacotron2_log_to_tb_func(
    swriter,
    tensors,
    step,
    tag="train",
    log_images=False,
    log_images_freq=1,
    add_audio=True,
    griffin_lim_mag_scale=1024,
    griffin_lim_power=1.2,
    sr=22050,
    n_fft=1024,
    n_mels=80,
    fmax=8000,
):
    loss, spec_target, mel_postnet, gate, gate_target, alignments = tensors
    if loss:
        swriter.add_scalar("loss", loss, step)
    if log_images and step % log_images_freq == 0:
        swriter.add_image(
            f"{tag}_alignment", plot_alignment_to_numpy(alignments[0].data.cpu().numpy().T), step, dataformats="HWC",
        )
        swriter.add_image(
            f"{tag}_mel_target", plot_spectrogram_to_numpy(spec_target[0].data.cpu().numpy()), step, dataformats="HWC",
        )
        swriter.add_image(
            f"{tag}_mel_predicted",
            plot_spectrogram_to_numpy(mel_postnet[0].data.cpu().numpy()),
            step,
            dataformats="HWC",
        )
        swriter.add_image(
            f"{tag}_gate",
            plot_gate_outputs_to_numpy(gate_target[0].data.cpu().numpy(), torch.sigmoid(gate[0]).data.cpu().numpy(),),
            step,
            dataformats="HWC",
        )
        if add_audio:
            filterbank = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels, fmax=fmax)
            log_mel = mel_postnet[0].data.cpu().numpy().T
            mel = np.exp(log_mel)
            magnitude = np.dot(mel, filterbank) * griffin_lim_mag_scale
            audio = griffin_lim(magnitude.T ** griffin_lim_power)
            swriter.add_audio(f"audio/{tag}_predicted", audio / max(np.abs(audio)), step, sample_rate=sr)

            log_mel = spec_target[0].data.cpu().numpy().T
            mel = np.exp(log_mel)
            magnitude = np.dot(mel, filterbank) * griffin_lim_mag_scale
            audio = griffin_lim(magnitude.T ** griffin_lim_power)
            swriter.add_audio(f"audio/{tag}_target", audio / max(np.abs(audio)), step, sample_rate=sr)


def tacotron2_process_eval_batch(tensors: dict, global_vars: dict):
    if 'EvalLoss' not in global_vars.keys():
        global_vars['EvalLoss'] = []
    if 'tensorboard' not in global_vars.keys():
        global_vars['tensorboard'] = {}
        for k, v in tensors.items():
            if k.startswith("processed_signal"):
                global_vars['tensorboard']['mel_target'] = v[0]
            if k.startswith("mel_output"):
                global_vars['tensorboard']['mel_pred'] = v[0]
            if k.startswith("gate_output"):
                global_vars['tensorboard']['gate'] = v[0]
            if k.startswith("alignments"):
                global_vars['tensorboard']['alignments'] = v[0]
            if k.startswith("gate_target"):
                global_vars['tensorboard']['gate_target'] = v[0]

    for k in tensors.keys():
        if k.startswith("loss"):
            loss_key = k
    global_vars['EvalLoss'].append(torch.mean(torch.stack(tensors[loss_key])))


def tacotron2_process_final_eval(global_vars: dict, tag=None):
    eloss = torch.mean(torch.stack(global_vars['EvalLoss'])).item()
    global_vars['EvalLoss'] = eloss
    logging.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
    return global_vars


def tacotron2_eval_log_to_tb_func(
    swriter,
    global_vars,
    step,
    tag=None,
    add_audio=True,
    griffin_lim_mag_scale=1024,
    griffin_lim_power=1.2,
    sr=22050,
    n_fft=1024,
    n_mels=80,
    fmax=8000,
):
    spec_target = global_vars['tensorboard']["mel_target"]
    mel_postnet = global_vars['tensorboard']["mel_pred"]
    gate = global_vars['tensorboard']["gate"]
    gate_target = global_vars['tensorboard']["gate_target"]
    alignments = global_vars['tensorboard']["alignments"]
    swriter.add_scalar(f"{tag}.loss", global_vars['EvalLoss'], step)
    tacotron2_log_to_tb_func(
        swriter,
        [None, spec_target, mel_postnet, gate, gate_target, alignments],
        step,
        tag=tag,
        log_images=True,
        add_audio=add_audio,
        griffin_lim_mag_scale=griffin_lim_mag_scale,
        griffin_lim_power=griffin_lim_power,
        sr=sr,
        n_fft=n_fft,
        n_mels=n_mels,
        fmax=fmax,
    )


def save_figure_to_numpy(fig):
    # save it to a numpy array.
    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    return data


def plot_alignment_to_numpy(alignment, info=None):
    fig, ax = plt.subplots(figsize=(6, 4))
    im = ax.imshow(alignment, aspect='auto', origin='lower', interpolation='none')
    fig.colorbar(im, ax=ax)
    xlabel = 'Decoder timestep'
    if info is not None:
        xlabel += '\n\n' + info
    plt.xlabel(xlabel)
    plt.ylabel('Encoder timestep')
    plt.tight_layout()

    fig.canvas.draw()
    data = save_figure_to_numpy(fig)
    plt.close()
    return data


def plot_spectrogram_to_numpy(spectrogram):
    fig, ax = plt.subplots(figsize=(12, 3))
    im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation='none')
    plt.colorbar(im, ax=ax)
    plt.xlabel("Frames")
    plt.ylabel("Channels")
    plt.tight_layout()

    fig.canvas.draw()
    data = save_figure_to_numpy(fig)
    plt.close()
    return data


def plot_gate_outputs_to_numpy(gate_targets, gate_outputs):
    fig, ax = plt.subplots(figsize=(12, 3))
    ax.scatter(
        range(len(gate_targets)), gate_targets, alpha=0.5, color='green', marker='+', s=1, label='target',
    )
    ax.scatter(
        range(len(gate_outputs)), gate_outputs, alpha=0.5, color='red', marker='.', s=1, label='predicted',
    )

    plt.xlabel("Frames (Green target, Red predicted)")
    plt.ylabel("Gate State")
    plt.tight_layout()

    fig.canvas.draw()
    data = save_figure_to_numpy(fig)
    plt.close()
    return data