python source code of test_models

Project: audio (GitHub Link)

audio-master
- .github
  - pytorch-probot.yml
  - ISSUE_TEMPLATE
    - bug-report.md
    - questions-help-support.md
    - feature-request.md
    - documentation.md
- build_tools
  - travis
    - test_script.sh
    - install.sh
  - setup_helpers
    - __init__.py
    - extension.py
  - __init__.py
- .circleci
  - test
    - test_sort_yaml.py
  - config.yml.in
  - smoke_test
    - docker
      - Dockerfile
  - regenerate.py
  - config.yml
  - unittest
    - windows
      - README.md
      - scripts
        environment.yml
        setup_env.sh
        install_conda.bat
        install.sh
        run_test.sh
        post_process.sh
    - linux
      - docker
        build_and_push.sh
        scripts
        copy_kaldi_executables.sh
        build_third_parties.sh
        Dockerfile
        .gitignore
        .dockerignore
      - README.md
      - scripts
        environment.yml
        setup_env.sh
        install.sh
        run_test.sh
        run_style_checks.sh
        post_process.sh
- examples
  - interactive_asr
    - asr.py
    - data
    - __init__.py
    - README.md
    - utils.py
    - vad.py
  - test
    - test_interactive_asr.py
    - __init__.py
- third_party
  - build_codec_helper.sh
  - patch
    - libmad.patch
  - CMakeLists.txt
- LICENSE
- test
  - test_sox_compatibility.py
  - compliance
    - generate_fbank_data.py
    - __init__.py
    - utils.py
    - generate_test_stft_data.py
  - test_sox_effects.py
  - common_utils
    - backend_utils.py
    - sox_utils.py
    - test_case_utils.py
    - parameterized_utils.py
    - __init__.py
    - data_utils.py
    - wav_utils.py
  - kaldi_compatibility_impl.py
  - test_batch_consistency.py
  - test_io.py
  - torchscript_consistency_cpu_test.py
  - torchscript_consistency_impl.py
  - torchscript_consistency_cuda_test.py
  - test_compliance_kaldi.py
  - functional_cpu_test.py
  - test_dataloader.py
  - kaldi_compatibility_cpu_test.py
  - functional_cuda_test.py
  - test_models.py
  - test_transforms.py
  - test_backend.py
  - functional_impl.py
  - kaldi_compatibility_cuda_test.py
  - __init__.py
  - README.md
  - test_librosa_compatibility.py
  - test_datasets.py
  - test_kaldi_io.py
  - sox_io_backend
    - test_save.py
    - test_roundtrip.py
    - test_load.py
    - test_info.py
    - common.py
    - __init__.py
    - test_torchscript.py
  - assets
    - SpeechCommands
      - speech_commands_v0.02
        go
        0a9f9af7_nohash_0.wav
    - ARCTIC
      - cmu_us_aew_arctic
        wav
        etc
        txt.done.data
    - kaldi_file.wav
    - mat.ark
    - kaldi_test_fbank_args.json
    - LibriSpeech
      - dev-clean
        1272
        128104
        1272-128104-0000.flac
        1272-128104.trans.txt
    - whitenoise.mp3
    - kaldi_file_8000.wav
    - vec_flt.ark
    - genres
      - noise
    - vad-go-stereo-44100.wav
    - kaldi_test_mfcc_args.json
    - CommonVoice
      - cv-corpus-4-2019-12-10
        tt
        train.tsv
        clips
        common_voice_tt_00000000.mp3
    - vad-go-mono-32000.wav
    - kaldi_test_spectrogram_args.json
    - io
      - 96k_10_1ch.opus
      - 96k_0_1ch.opus
      - 96k_10_2ch.opus
      - 96k_5_2ch.opus
      - 96k_5_1ch.opus
      - generate_opus.py
      - 96k_0_2ch.opus
    - LJSpeech-1.1
      - metadata.csv
      - wavs
    - VCTK-Corpus
      - txt
        p224
        p224_002.txt
      - wav48
        p224
        p224_002.wav
    - steam-train-whistle-daniel_simon.mp3
    - kaldi
      - resample-16000-29000.ark
      - resample-16000-30000.ark
      - resample-16000-3000.ark
      - resample-16000-32000.ark
      - resample-16000-13000.ark
      - resample-16000-17000.ark
      - resample-16000-10000.ark
      - resample-16000-5000.ark
      - resample-16000-7000.ark
      - resample-16000-21000.ark
      - resample-16000-27000.ark
      - resample-16000-28000.ark
      - resample-16000-16000.ark
      - resample-16000-26000.ark
      - resample-16000-11000.ark
      - resample-16000-19000.ark
      - resample-16000-9000.ark
      - resample-16000-8000.ark
      - resample-16000-20000.ark
      - resample-16000-12000.ark
      - resample-16000-24000.ark
      - resample-16000-18000.ark
      - resample-16000-15000.ark
      - resample-16000-6000.ark
      - resample-16000-31000.ark
      - resample-16000-1000.ark
      - resample-16000-2000.ark
      - resample-16000-22000.ark
      - resample-16000-25000.ark
      - resample-16000-14000.ark
      - resample-16000-23000.ark
      - resample-16000-4000.ark
    - vec_int.ark
    - waves_yesno
      - 0_1_0_1_0_1_1_0.wav
- .clang-format
- .flake8
- torchaudio
  - compliance
    - kaldi.py
    - __init__.py
  - datasets
    - ljspeech.py
    - librispeech.py
    - speechcommands.py
    - cmuarctic.py
    - gtzan.py
    - yesno.py
    - __init__.py
    - utils.py
    - commonvoice.py
    - vctk.py
  - backend
    - sox_backend.py
    - sox_io_backend.py
    - common.py
    - __init__.py
    - utils.py
    - no_backend.py
    - soundfile_backend.py
  - models
    - __init__.py
    - _wavernn.py
    - wav2letter.py
  - extension
    - __init__.py
    - extension.py
  - functional.py
  - __init__.py
  - csrc
    - sox_effects.h
    - sox.h
    - sox_io.cpp
    - typedefs.h
    - register.cpp
    - sox_effects.cpp
    - sox_utils.cpp
    - sox_utils.h
    - sox.cpp
    - typedefs.cpp
    - sox_io.h
  - _internal
    - misc_ops.py
    - module_utils.py
    - __init__.py
  - sox_effects
    - sox_effects.py
    - __init__.py
  - transforms.py
  - kaldi_io.py
- mypy.ini
- setup.py
- .travis.yml
- README.md
- CODE_OF_CONDUCT.md
- requirements.txt
- .clang-tidy
- packaging
  - build_wheel.sh
  - pkg_helpers.bash
  - build_conda.sh
  - torchaudio
    - meta.yaml
    - build.sh
    - bld.bat
  - README.md
- .gitignore
- docs
  - Makefile
  - source
    - datasets.rst
    - compliance.kaldi.rst
    - kaldi_io.rst
    - transforms.rst
    - sox_effects.rst
    - index.rst
    - conf.py
    - models.rst
    - functional.rst
    - _static
      - img
        pytorch-logo-dark.svg
      - css
        pytorch_theme.css
  - make.bat
  - requirements.txt
- tox.ini

import torch
from torchaudio.models import Wav2Letter, _MelResNet, _UpsampleNetwork

from . import common_utils


class TestWav2Letter(common_utils.TorchaudioTestCase):

    def test_waveform(self):
        batch_size = 2
        num_features = 1
        num_classes = 40
        input_length = 320

        model = Wav2Letter(num_classes=num_classes, num_features=num_features)

        x = torch.rand(batch_size, num_features, input_length)
        out = model(x)

        assert out.size() == (batch_size, num_classes, 2)

    def test_mfcc(self):
        batch_size = 2
        num_features = 13
        num_classes = 40
        input_length = 2

        model = Wav2Letter(num_classes=num_classes, input_type="mfcc", num_features=num_features)

        x = torch.rand(batch_size, num_features, input_length)
        out = model(x)

        assert out.size() == (batch_size, num_classes, 2)


class TestMelResNet(common_utils.TorchaudioTestCase):

    def test_waveform(self):
        """Validate the output dimensions of a _MelResNet block.
        """

        n_batch = 2
        n_time = 200
        n_freq = 100
        n_output = 128
        n_res_block = 10
        n_hidden = 128
        kernel_size = 5

        model = _MelResNet(n_res_block, n_freq, n_hidden, n_output, kernel_size)

        x = torch.rand(n_batch, n_freq, n_time)
        out = model(x)

        assert out.size() == (n_batch, n_output, n_time - kernel_size + 1)


class TestUpsampleNetwork(common_utils.TorchaudioTestCase):

    def test_waveform(self):
        """Validate the output dimensions of a _UpsampleNetwork block.
        """

        upsample_scales = [5, 5, 8]
        n_batch = 2
        n_time = 200
        n_freq = 100
        n_output = 256
        n_res_block = 10
        n_hidden = 128
        kernel_size = 5

        total_scale = 1
        for upsample_scale in upsample_scales:
            total_scale *= upsample_scale

        model = _UpsampleNetwork(upsample_scales, n_res_block, n_freq, n_hidden, n_output, kernel_size)

        x = torch.rand(n_batch, n_freq, n_time)
        out1, out2 = model(x)

        assert out1.size() == (n_batch, n_freq, total_scale * (n_time - kernel_size + 1))
        assert out2.size() == (n_batch, n_output, total_scale * (n_time - kernel_size + 1))