python source code of onsets_frames_transcription

magenta-master
- ci-install.sh
- AUTHORS
- .gitmodules
- LICENSE
- ci-script.sh
- .pylintrc
- setup.py
- magenta
  - music
    - __init__.py
  - testdata
    - tfrecord_iterator_test.tfrecord
    - example.mid
  - tensor2tensor
    - t2t_trainer.py
    - t2t_datagen.py
    - models.py
    - __init__.py
    - t2t_decoder.py
    - problems.py
  - version.py
  - models
    - latent_transfer
      - train_joint.py
      - model_dataspace.py
      - train_dataspace_classifier.py
      - interpolate_joint.py
      - nn.py
      - model_joint.py
      - sample_dataspace.py
      - train_dataspace.py
      - common_joint.py
      - local_mnist.py
      - configs
        fashion_mnist_0_nlatent100.py
        wavegan.py
        fashion_mnist_0_nlatent64.py
        mnist_0_nlatent100.py
        joint_exp_mnist2fashion_parameterized.py
        joint_exp_mnist100_2wavegan_parameterized.py
        joint_exp_mnist2wavegan_parameterized.py
        joint_exp_2mnist_parameterized.py
        joint_exp_2fashion_parameterized.py
        fashion_mnist_classifier_0.py
        __init__.py
        mnist_classifier_0.py
        mnist_0_nlatent64.py
      - common.py
      - __init__.py
      - encode_dataspace.py
      - sample_wavegan.py
    - pianoroll_rnn_nade
      - pianoroll_rnn_nade_create_dataset_test.py
      - pianoroll_rnn_nade_train.py
      - pianoroll_rnn_nade_graph.py
      - pianoroll_rnn_nade_create_dataset.py
      - pianoroll_rnn_nade_generate.py
      - pianoroll_rnn_nade_sequence_generator.py
      - pianoroll_rnn_nade_model.py
      - __init__.py
      - README.md
    - polyphony_rnn
      - polyphony_encoder_decoder_test.py
      - polyphony_rnn_create_dataset_test.py
      - polyphony_lib_test.py
      - polyphony_encoder_decoder.py
      - polyphony_model.py
      - polyphony_rnn_pipeline.py
      - polyphony_rnn_create_dataset.py
      - polyphony_rnn_train.py
      - polyphony_rnn_generate.py
      - polyphony_lib.py
      - __init__.py
      - README.md
      - polyphony_sequence_generator.py
    - coconet
      - lib_sampling.py
      - lib_tfutil.py
      - samples
        generated_result.npy
      - lib_tfsampling.py
      - lib_hparams.py
      - lib_graph.py
      - lib_logging.py
      - export_saved_model_test.py
      - testdata
        TestData.npz
      - train_bazel.sh
      - coconet_evaluate.py
      - lib_mask.py
      - lib_data.py
      - lib_util.py
      - lib_evaluation.py
      - sample_bazel.sh
      - evalsample_bazel.sh
      - __init__.py
      - README.md
      - coconet_sample.py
      - coconet_train.py
      - export_saved_model.py
      - evalmodel_bazel.sh
      - lib_saved_model.py
      - lib_pianoroll.py
    - drums_rnn
      - drums_rnn_generate.py
      - drums_rnn_create_dataset.py
      - drums_rnn_train.py
      - drums_rnn_pipeline.py
      - drums_rnn_config_flags.py
      - drums_rnn_model.py
      - drums_rnn_create_dataset_test.py
      - __init__.py
      - README.md
      - drums_rnn_sequence_generator.py
    - onsets_frames_transcription
      - data.py
      - infer.py
      - onsets_frames_transcription_spectrogram_json.py
      - configs.py
      - data_test.py
      - model_tpu.py
      - create_dataset.py
      - realtime
        onsets_frames_transcription_realtime.py
        __init__.py
        README.md
        audio_recorder.py
        tflite_model.py
      - onsets_frames_transcription_transcribe.py
      - metrics_test.py
      - audio_label_data_utils.py
      - audio_transform.py
      - metrics.py
      - onsets_frames_transcription_create_tfrecords.py
      - mfcc_mel.py
      - onsets_frames_transcription_create_dataset_maps.py
      - melspec_input.py
      - mfcc_mel_test.py
      - infer_util.py
      - constants.py
      - model.py
      - infer_util_test.py
      - onsets_frames_transcription_train.py
      - audio_label_data_utils_test.py
      - melspec_input_test.py
      - onsets_frames_transcription_infer.py
      - __init__.py
      - README.md
      - create_dataset_lib_test.py
      - create_dataset_lib.py
      - train_util.py
      - onsets_frames_transcription_create_dataset.py
      - estimator_spec_util.py
      - drum_mappings.py
    - melody_rnn
      - melody_rnn_sequence_generator.py
      - melody_rnn_model.py
      - melody_rnn_generate.py
      - melody_rnn_config_flags.py
      - melody_rnn_create_dataset.py
      - melody_rnn_pipeline.py
      - __init__.py
      - README.md
      - primer.mid
      - melody_rnn_train.py
      - melody_rnn_create_dataset_test.py
    - sketch_rnn
      - model.py
      - sketch_rnn_train.py
      - __init__.py
      - README.md
      - utils.py
      - rnn.py
      - assets
        data_format.svg
        catbus.svg
    - music_vae
      - data.py
      - music_vae_train.py
      - configs.py
      - lstm_utils_test.py
      - data_test.py
      - trained_model.py
      - lstm_models.py
      - data_hierarchical_test.py
      - base_model.py
      - data_hierarchical.py
      - lstm_utils.py
      - __init__.py
      - README.md
      - js
        README.md
      - music_vae_generate.py
    - svg_vae
      - svg_decoder_loss.py
      - svg_decoder.py
      - glyphazzn.py
      - image_vae.py
      - svg_utils.py
      - __init__.py
      - datagen_beam.py
      - README.md
    - score2perf
      - transformer_autoencoder.py
      - modalities_test.py
      - score2perf_hparams.py
      - score2perf.py
      - datagen_beam_test.py
      - music_encoders.py
      - __init__.py
      - datagen_beam.py
      - README.md
      - music_encoders_test.py
      - modalities.py
    - arbitrary_image_stylization
      - arbitrary_image_stylization_losses.py
      - arbitrary_image_stylization_train_mobile.py
      - arbitrary_image_stylization_distill_mobilenet.py
      - images
        content_images
        README.md
        stylized_images_interpolation
        stylized_cobwebbed_images
        stylized_images
        style_images
        README.md
      - nza_model.py
      - arbitrary_image_stylization_build_model.py
      - arbitrary_image_stylization_build_mobilenet_model.py
      - arbitrary_image_stylization_train.py
      - arbitrary_image_stylization_convert_tflite.py
      - export_hub.py
      - __init__.py
      - README.md
      - arbitrary_image_stylization_with_weights.py
      - arbitrary_image_stylization_evaluate.py
    - image_stylization
      - vgg.py
      - sample_images
      - image_stylization_convert_tflite.py
      - ops.py
      - image_stylization_finetune.py
      - image_stylization_create_dataset.py
      - image_utils.py
      - model.py
      - imagenet_data.py
      - evaluation_images
      - image_stylization_train.py
      - __init__.py
      - README.md
      - learning.py
      - image_stylization_evaluate.py
      - image_stylization_transform.py
    - __init__.py
    - README.md
    - improv_rnn
      - improv_rnn_train.py
      - improv_rnn_config_flags.py
      - improv_rnn_model.py
      - improv_rnn_generate.py
      - improv_rnn_create_dataset.py
      - improv_rnn_pipeline.py
      - __init__.py
      - README.md
      - improv_rnn_create_dataset_test.py
      - improv_rnn_sequence_generator.py
    - shared
      - sequence_generator.py
      - sequence_generator_bundle.py
      - events_rnn_graph.py
      - events_rnn_train.py
      - events_rnn_graph_test.py
      - model.py
      - __init__.py
      - sequence_generator_test.py
      - events_rnn_model.py
    - rl_tuner
      - rl_tuner_train.py
      - rl_tuner_eval_metrics.py
      - rl_tuner_test.py
      - note_rnn_loader.py
      - __init__.py
      - README.md
      - rl_tuner_ops.py
      - rl_tuner.py
    - nsynth
      - reader.py
      - wavenet
        fastgen.py
        nsynth_save_embeddings.py
        nsynth_generate.py
        train.py
        __init__.py
        h512_bo16.py
        fastgen_test.py
        masked.py
      - baseline
        models
        ae_configs
        nfft_1024.py
        __init__.py
        ae.py
        __init__.py
        save_embeddings.py
        train.py
        __init__.py
      - __init__.py
      - README.md
      - utils.py
    - gansynth
      - gansynth_train.py
      - lib
        specgrams_helper.py
        generate_util.py
        util.py
        spectral_ops.py
        network_functions.py
        flags.py
        model.py
        datasets.py
        layers.py
        __init__.py
        data_normalizer.py
        data_helpers.py
        train_util.py
        networks.py
        spectral_ops_test.py
        specgrams_helper_test.py
      - gansynth_generate.py
      - configs
        mel_prog_hires.py
        __init__.py
      - __init__.py
      - README.md
    - performance_rnn
      - performance_model.py
      - performance_rnn_generate.py
      - performance_rnn_create_dataset.py
      - performance_rnn_train.py
      - performance_rnn_create_dataset_test.py
      - __init__.py
      - README.md
      - performance_sequence_generator.py
    - piano_genie
      - util.py
      - configs.py
      - eval.py
      - gold.py
      - model.py
      - train.py
      - __init__.py
      - README.md
      - loader.py
  - common
    - sequence_example_lib.py
    - concurrency_test.py
    - state_util_test.py
    - state_util.py
    - tf_utils.py
    - testing_lib.py
    - nade_test.py
    - __init__.py
    - concurrency.py
    - beam_search.py
    - beam_search_test.py
    - nade.py
  - reviews
    - pixelrnn.md
    - summary_generation_sequences.md
    - styletransfer.md
    - GAN.md
    - draw.md
    - rnnrbm.md
    - README.md
    - assets
      - gan
  - pipelines
    - statistics.py
    - drum_pipelines.py
    - lead_sheet_pipelines_test.py
    - pianoroll_pipeline_test.py
    - performance_pipeline_test.py
    - pipeline.py
    - dag_pipeline.py
    - chord_pipelines_test.py
    - pianoroll_pipeline.py
    - performance_pipeline.py
    - note_sequence_pipelines_test.py
    - melody_pipelines_test.py
    - chord_pipelines.py
    - melody_pipelines.py
    - pipelines_common_test.py
    - statistics_test.py
    - dag_pipeline_test.py
    - drum_pipelines_test.py
    - pipeline_test.py
    - __init__.py
    - note_sequence_pipelines.py
    - README.md
    - event_sequence_pipeline.py
    - pipelines_common.py
    - lead_sheet_pipelines.py
  - __init__.py
  - scripts
    - abc_compare.py
    - unpack_bundle.py
    - convert_dir_to_note_sequences_test.py
    - convert_dir_to_note_sequences.py
    - __init__.py
    - README.md
  - tools
    - magenta-install.sh
  - js
    - README.md
  - interfaces
    - midi
      - magenta_midi.py
      - midi_interaction.py
      - midi_clock.py
      - midi_hub_test.py
      - __init__.py
      - README.md
      - midi_hub.py
    - __init__.py
  - contrib
    - seq2seq_test.py
    - cudnn_rnn.py
    - training.py
    - rnn_test.py
    - __init__.py
    - seq2seq.py
    - rnn.py
  - video
    - next_frame_prediction_pix2pix
      - join_pairs.py
      - create_video.sh
      - recursion_640.sh
      - README.md
    - __init__.py
    - tools
      - extract_multiple_video.sh
      - extract_frames.py
      - convert2jpg.py
      - concat_mp4.sh
      - __init__.py
      - create_mp4.sh
      - random_pick.py
- demos
  - README.md
- setup.cfg
- .travis.yml
- README.md
- .isort.cfg
- .gitignore

# Copyright 2020 The Magenta Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Transcribe a recording of piano audio."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os

from magenta.models.onsets_frames_transcription import audio_label_data_utils
from magenta.models.onsets_frames_transcription import configs
from magenta.models.onsets_frames_transcription import data
from magenta.models.onsets_frames_transcription import infer_util
from magenta.models.onsets_frames_transcription import train_util
from note_seq import midi_io
from note_seq.protobuf import music_pb2
import six
import tensorflow.compat.v1 as tf

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('config', 'onsets_frames',
                           'Name of the config to use.')
tf.app.flags.DEFINE_string('model_dir', None,
                           'Path to look for acoustic checkpoints.')
tf.app.flags.DEFINE_string(
    'checkpoint_path', None,
    'Filename of the checkpoint to use. If not specified, will use the latest '
    'checkpoint')
tf.app.flags.DEFINE_string(
    'hparams',
    '',
    'A comma-separated list of `name=value` hyperparameter values.')
tf.app.flags.DEFINE_boolean(
    'load_audio_with_librosa', False,
    'Whether to use librosa for sampling audio (required for 24-bit audio)')
tf.app.flags.DEFINE_string(
    'transcribed_file_suffix', '',
    'Optional suffix to add to transcribed files.')
tf.app.flags.DEFINE_string(
    'log', 'INFO',
    'The threshold for what messages will be logged: '
    'DEBUG, INFO, WARN, ERROR, or FATAL.')


def create_example(filename, sample_rate, load_audio_with_librosa):
  """Processes an audio file into an Example proto."""
  wav_data = tf.gfile.Open(filename, 'rb').read()
  example_list = list(
      audio_label_data_utils.process_record(
          wav_data=wav_data,
          sample_rate=sample_rate,
          ns=music_pb2.NoteSequence(),
          # decode to handle filenames with extended characters.
          example_id=six.ensure_text(filename, 'utf-8'),
          min_length=0,
          max_length=-1,
          allow_empty_notesequence=True,
          load_audio_with_librosa=load_audio_with_librosa))
  assert len(example_list) == 1
  return example_list[0].SerializeToString()


def run(argv, config_map, data_fn):
  """Create transcriptions."""
  tf.logging.set_verbosity(FLAGS.log)

  config = config_map[FLAGS.config]
  hparams = config.hparams
  hparams.parse(FLAGS.hparams)
  hparams.batch_size = 1
  hparams.truncated_length_secs = 0

  with tf.Graph().as_default():
    examples = tf.placeholder(tf.string, [None])

    dataset = data_fn(
        examples=examples,
        preprocess_examples=True,
        params=hparams,
        is_training=False,
        shuffle_examples=False,
        skip_n_initial_records=0)

    estimator = train_util.create_estimator(config.model_fn,
                                            os.path.expanduser(FLAGS.model_dir),
                                            hparams)

    iterator = dataset.make_initializable_iterator()
    next_record = iterator.get_next()

    with tf.Session() as sess:
      sess.run([
          tf.initializers.global_variables(),
          tf.initializers.local_variables()
      ])

      for filename in argv[1:]:
        tf.logging.info('Starting transcription for %s...', filename)

        # The reason we bounce between two Dataset objects is so we can use
        # the data processing functionality in data.py without having to
        # construct all the Example protos in memory ahead of time or create
        # a temporary tfrecord file.
        tf.logging.info('Processing file...')
        sess.run(iterator.initializer,
                 {examples: [
                     create_example(filename, hparams.sample_rate,
                                    FLAGS.load_audio_with_librosa)]})

        def transcription_data(params):
          del params
          return tf.data.Dataset.from_tensors(sess.run(next_record))
        input_fn = infer_util.labels_to_features_wrapper(transcription_data)

        tf.logging.info('Running inference...')
        checkpoint_path = None
        if FLAGS.checkpoint_path:
          checkpoint_path = os.path.expanduser(FLAGS.checkpoint_path)
        prediction_list = list(
            estimator.predict(
                input_fn,
                checkpoint_path=checkpoint_path,
                yield_single_examples=False))
        assert len(prediction_list) == 1

        sequence_prediction = music_pb2.NoteSequence.FromString(
            prediction_list[0]['sequence_predictions'][0])

        midi_filename = filename + FLAGS.transcribed_file_suffix + '.midi'
        midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename)

        tf.logging.info('Transcription written to %s.', midi_filename)


def main(argv):
  run(argv, config_map=configs.CONFIG_MAP, data_fn=data.provide_batch)


def console_entry_point():
  tf.disable_v2_behavior()
  tf.app.run(main)

if __name__ == '__main__':
  console_entry_point()