# Copyright 2016 Conchylicultor. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ The batch builder convert the songs into data readable by the neural networks. Used for training, testing and generating """ import random # Shuffling import operator # Multi-level sorting import numpy as np import deepmusic.songstruct as music class Batch: """Structure containing batches info Should be in a tf placeholder compatible format """ def __init__(self): self.inputs = [] self.targets = [] def generate(self, target=True): """ Is called just before feeding the placeholder, allows additional pre-processing Args: target(Bool): is true if the bach also need to generate the target """ pass class BatchBuilder: """ Class which create and manage batches Batches are created from the songs Define the song representation input (and output) format so the network must support the format The class has the choice to either entirely create the batches when get list is called or to create the batches as the training progress (more memory efficient) """ # TODO: Should add a option to pre-compute a lot of batches and # cache them in the hard drive # TODO: For generating mode, add another function # TODO: Add a function to get the length too (for tqdm when generators) ? def __init__(self, args): """ """ self.args = args @staticmethod def get_module_id(): """ Return the unique id associated with the builder Ultimately, the id will be used for saving/loading the dataset, and as parameter argument. Returns: str: The name of the builder """ raise NotImplementedError('Abstract class') def get_list(self, dataset, name): """ Compute the batches for the current epoch Is called twice (for training and testing) Args: dataset (list[Objects]): the training/testing set name (str): indicate the dataset type Return: list[Batch]: the batches to process """ raise NotImplementedError('Abstract class') def build_next(self, batch): """ In case of a generator (batches non precomputed), compute the batch given the batch id passed Args: batch: the current testing or training batch or id of batch to generate Return: Batch: the computed batch """ # TODO: Unused function. Instead Batch.generate does the same thing. Is it # a good idea ? Probably not. Instead should prefer this factory function return batch def build_placeholder_input(self): """ Create a placeholder compatible with the batch input Allow to control the dimensions Return: tf.placeholder: the placeholder for a single timestep """ raise NotImplementedError('Abstract class') def build_placeholder_target(self): """ Create a placeholder compatible with the target Allow to control the dimensions Return: tf.placeholder: the placeholder for a single timestep """ # TODO: The target also depend of the loss function (sigmoid, softmax,...) How to redefined that ? raise NotImplementedError('Abstract class') def process_song(self, song): """ Apply some pre-processing to the songs so the song already get the right input representation. Do it once globally for all songs Args: song (Song): the training/testing set Return: Object: the song after formatting """ return song # By default no pre-processing def reconstruct_song(self, song): """ Reconstruct the original raw song from the preprocessed data We should have: reconstruct_song(process_song(my_song)) == my_song Args: song (Object): the training/testing set Return: Song: the song after formatting """ return song # By default no pre-processing def process_batch(self, raw_song): """ Create the batch associated with the song Called when generating songs to create the initial input batch Args: raw_song (Song): The song to convert Return: Batch """ raise NotImplementedError('Abstract class') def reconstruct_batch(self, output, batch_id, chosen_labels=None): """ Create the song associated with the network output Args: output (list[np.Array]): The ouput of the network (size batch_size*output_dim) batch_id (int): The batch that we must reconstruct chosen_labels (list[np.Array[batch_size, int]]): the sampled class at each timestep (useful to reconstruct the generated song) Return: Song: The reconstructed song """ raise NotImplementedError('Abstract class') def get_input_dim(): """ Return the input dimension Return: int: """ raise NotImplementedError('Abstract class') class Relative(BatchBuilder): """ Prepare the batches for the current epoch. Generate batches of the form: 12 values for relative position with previous notes (modulo 12) 14 values for the relative pitch (+/-7) 12 values for the relative positions with the previous note """ NB_NOTES_SCALE = 12 OFFSET_SCALES = 0 # Start at A0 NB_SCALES = 7 # Up to G7 (Official order is A6, B6, C7, D7, E7,... G7) # Experiments on the relative note representation: # Experiment 1: # * As baseline, we only project the note on one scale (C5: 51) BASELINE_OFFSET = 51 # Options: # * Note absolute (A,B,C,...G) vs relative ((current-prev)%12) NOTE_ABSOLUTE = False # * Use separation token between the notes (a note with class_pitch=-1 is a separation token) HAS_EMPTY = True class RelativeNote: """ Struct which define a note in a relative way with respect to the previous note Can only play 7 octave (so the upper and lower notes of the piano are never reached (not that important in practice)) """ def __init__(self): # TODO: Should the network get some information about the absolute pitch ?? An other way could be to # always start by a note from the base # TODO: Define behavior when saturating # TODO: Try with time relative to prev vs next # TODO: Try to randomly permute chords vs low to high pitch # TODO: Try pitch %7 vs fixed +/-7 # TODO: Try to add a channel number for each note (2 class SoftMax) <= Would require a clean database where the melodie/bass are clearly separated self.pitch_class = 0 # A, B, C,... +/- %12 self.scale = 0 # Octave +/- % 7 self.prev_tick = 0 # Distance from previous note (from -0 up to -MAXIMUM_SONG_RESOLUTION*NOTES_PER_BAR (=1 bar)) class RelativeSong: """ Struct which define a song in a relative way (intern class format) Can only play 7 octave (so the upper and lower notes of the piano are never reached (not that important in practice)) """ def __init__(self): """ All attribute are defined with respect with the previous one """ self.first_note = None # Define the reference note self.notes = [] class RelativeBatch(Batch): """ Struct which contains temporary information necessary to reconstruct the batch """ class SongExtract: # Define a subsong def __init__(self): self.song = None # The song reference self.begin = 0 self.end = 0 def __init__(self, extracts): """ Args: extracts(list[SongExtract]): Should be of length batch_size, or at least all from the same size """ super().__init__() self.extracts = extracts def generate(self, target=True): """ Args: target(Bool): is true if the bach also need to generate the target """ # TODO: Could potentially be optimized (one big numpy array initialized only one, each input is a sub-arrays) # TODO: Those inputs should be cleared once the training pass has be run (Use class with generator, __next__ and __len__) sequence_length = self.extracts[0].end - self.extracts[0].begin shape_input = (len(self.extracts), Relative.RelativeBatch.get_input_dim()) # (batch_size, note_space) +1 because of the <next> token def gen_input(i): array = np.zeros(shape_input) for j, extract in enumerate(self.extracts): # Iterate over the batches # Set the one-hot vector (chose label between <next>,A,...,G) label = extract.song.notes[extract.begin + i].pitch_class array[j, 0 if not label else label + 1] = 1 return array def gen_target(i): # TODO: Could merge with the previous function to optimize the calls array = np.zeros([len(self.extracts)], dtype=int) # Int for SoftMax compatibility for j, extract in enumerate(self.extracts): # Iterate over the batches # Set the one-hot label (chose label between <next>,A,...,G) label = extract.song.notes[extract.begin + i + 1].pitch_class # Warning: +1 because targets are shifted with respect to the inputs array[j] = 0 if not label else label + 1 return array self.inputs = [gen_input(i) for i in range(sequence_length)] # Generate each input sequence if target: self.targets = [gen_target(i) for i in range(sequence_length)] @staticmethod def get_input_dim(): """ """ # TODO: Refactoring. Where to place this functions ?? Should be accessible from model, and batch and depend of # batch_builder, also used in enco/deco modules. Ideally should not be static return 1 + Relative.NB_NOTES_SCALE # +1 because of the <next> token def __init__(self, args): super().__init__(args) @staticmethod def get_module_id(): return 'relative' def process_song(self, old_song): """ Pre-process the data once globally Do it once globally. Args: old_song (Song): original song Returns: list[RelativeSong]: the new formatted song """ new_song = Relative.RelativeSong() old_song.normalize() # Gather all notes and sort them by absolute time all_notes = [] for track in old_song.tracks: for note in track.notes: all_notes.append(note) all_notes.sort(key=operator.attrgetter('tick', 'note')) # Sort first by tick, then by pitch # Compute the relative position for each note prev_note = all_notes[0] new_song.first_note = prev_note # TODO: What if the song start by a chord ? for note in all_notes[1:]: # Check if we should insert an empty token temporal_distance = note.tick - prev_note.tick assert temporal_distance >= 0 if Relative.HAS_EMPTY and temporal_distance > 0: for i in range(temporal_distance): separator = Relative.RelativeNote() # Separation token separator.pitch_class = None new_song.notes.append(separator) # Insert the new relative note new_note = Relative.RelativeNote() if Relative.NOTE_ABSOLUTE: new_note.pitch_class = note.note % Relative.NB_NOTES_SCALE else: new_note.pitch_class = (note.note - prev_note.note) % Relative.NB_NOTES_SCALE new_note.scale = (note.note//Relative.NB_NOTES_SCALE - prev_note.note//Relative.NB_NOTES_SCALE) % Relative.NB_SCALES # TODO: add offset for the notes ? (where does the game begins ?) new_note.prev_tick = temporal_distance new_song.notes.append(new_note) prev_note = note return new_song def reconstruct_song(self, rel_song): """ Reconstruct the original raw song from the preprocessed data See parent class for details Some information will be lost compare to the original song: * Only one track left * Original tempo lost Args: rel_song (RelativeSong): the song to reconstruct Return: Song: the reconstructed song """ raw_song = music.Song() main_track = music.Track() prev_note = rel_song.first_note main_track.notes.append(rel_song.first_note) current_tick = rel_song.first_note.tick for next_note in rel_song.notes: # Case of separator if next_note.pitch_class is None: current_tick += 1 continue # Adding the new note new_note = music.Note() # * Note if Relative.NOTE_ABSOLUTE: new_note.note = Relative.BASELINE_OFFSET + next_note.pitch_class else: new_note.note = Relative.BASELINE_OFFSET + ((prev_note.note-Relative.BASELINE_OFFSET) + next_note.pitch_class) % Relative.NB_NOTES_SCALE # * Tick if Relative.HAS_EMPTY: new_note.tick = current_tick else: new_note.tick = prev_note.tick + next_note.prev_tick # * Scale # ... main_track.notes.append(new_note) prev_note = new_note raw_song.tracks.append(main_track) raw_song.normalize(inverse=True) return raw_song def process_batch(self, raw_song): """ Create the batch associated with the song Args: raw_song (Song): The song to convert Return: RelativeBatch """ processed_song = self.process_song(raw_song) extract = self.create_extract(processed_song, 0, len(processed_song.notes)) batch = Relative.RelativeBatch([extract]) return batch def reconstruct_batch(self, output, batch_id, chosen_labels=None): """ Create the song associated with the network output Args: output (list[np.Array]): The ouput of the network (size batch_size*output_dim) batch_id (int): The batch id chosen_labels (list[np.Array[batch_size, int]]): the sampled class at each timestep (useful to reconstruct the generated song) Return: Song: The reconstructed song """ assert Relative.HAS_EMPTY == True processed_song = Relative.RelativeSong() processed_song.first_note = music.Note() processed_song.first_note.note = 56 # TODO: Define what should be the first note print('Reconstruct') for i, note in enumerate(output): relative = Relative.RelativeNote() # Here if we did sample the output, we should get which has heen the selected output if not chosen_labels or i == len(chosen_labels): # If chosen_labels, the last generated note has not been sampled chosen_label = int(np.argmax(note[batch_id,:])) # Cast np.int64 to int to avoid compatibility with mido else: chosen_label = int(chosen_labels[i][batch_id]) print(chosen_label, end=' ') # TODO: Add a text output connector if chosen_label == 0: # <next> token relative.pitch_class = None #relative.scale = # Note used #relative.prev_tick = else: relative.pitch_class = chosen_label-1 #relative.scale = #relative.prev_tick = processed_song.notes.append(relative) print() return self.reconstruct_song(processed_song) def create_extract(self, processed_song, start, length): """ preprocessed song > batch """ extract = Relative.RelativeBatch.SongExtract() extract.song = processed_song extract.begin = start extract.end = extract.begin + length return extract # TODO: How to optimize !! (precompute all values, use sparse arrays ?) def get_list(self, dataset, name): """ See parent class for more details Args: dataset (list[Song]): the training/testing set name (str): indicate the dataset type Return: list[Batch]: the batches to process """ # Randomly extract subsamples of the songs print('Subsampling the songs ({})...'.format(name)) extracts = [] sample_subsampling_length = self.args.sample_length+1 # We add 1 because each input has to predict the next output for song in dataset: len_song = len(song.notes) max_start = len_song - sample_subsampling_length assert max_start >= 0 # TODO: Error handling (and if =0, compatible with randint ?) nb_sample_song = 2*len_song // self.args.sample_length # The number of subsample is proportional to the song length (TODO: Could control the factor) for _ in range(nb_sample_song): extracts.append(self.create_extract( song, random.randrange(max_start), # Begin TODO: Add mode to only start at the beginning of a bar self.args.sample_length # End )) # Shuffle the song extracts print('Shuffling the dataset...') random.shuffle(extracts) # Group the samples together to create the batches print('Generating batches...') def gen_next_samples(): """ Generator over the mini-batch training samples Warning: the last samples will be ignored if the number of batch does not match the number of samples """ nb_samples = len(extracts) for i in range(nb_samples//self.args.batch_size): yield extracts[i*self.args.batch_size:(i+1)*self.args.batch_size] batch_set = [Relative.RelativeBatch(e) for e in gen_next_samples()] return batch_set def get_input_dim(self): """ In the case of the relative song, the input dim is the number of note on the scale (12) + 1 for the next token Return: int: """ return Relative.RelativeBatch.get_input_dim() class PianoRoll(BatchBuilder): """ Old piano roll format (legacy code). Won't work as it is """ def __init__(self, args): super().__init__(args) @staticmethod def get_module_id(): return 'pianoroll' def get_list(self, dataset): # On the original version, the songs were directly converted to piano roll # self._convert_song2array() batches = [] # TODO: Create batches (randomly cut each song in some small parts (need to know the total length for that) # then create the big matrix (NB_NOTE*sample_length) and turn that into batch). If process too long, # could save the created batches in a new folder, data/samples or save/model. # TODO: Create batches from multiples length (buckets). How to change the loss functions weights (longer # sequences more penalized ?) # TODO: Optimize memory management # First part: Randomly extract subsamples of the songs print('Subsampling songs ({})...'.format('train' if train_set else 'test')) sample_subsampling_length = self.args.sample_length+1 # We add 1 because each input has to predict the next output sub_songs = [] songs_set = dataset for song in songs_set: len_song = song.shape[-1] # The last dimension correspond to the song duration max_start = len_song - sample_subsampling_length assert max_start >= 0 # TODO: Error handling (and if =0, compatible with randint ?) nb_sample_song = 2*len_song // self.args.sample_length # The number of subsample is proportional to the song length for _ in range(nb_sample_song): start = np.random.randint(max_start) # TODO: Add mode to only start at the begining of a bar sub_song = song[:, start:start+sample_subsampling_length] sub_songs.append(sub_song) # Second part: Shuffle the song extracts print("Shuffling the dataset...") np.random.shuffle(sub_songs) # Third part: Group the samples together to create the batches print("Generating batches...") def gen_next_samples(): """ Generator over the mini-batch training samples Warning: the last samples will be ignored if the number of batch does not match the number of samples """ nb_samples = len(sub_songs) for i in range(nb_samples//self.args.batch_size): yield sub_songs[i*self.args.batch_size:(i+1)*self.args.batch_size] for samples in gen_next_samples(): # TODO: tqdm with persist = False / will this work with generators ? batch = Batch() # samples has shape [batch_size, NB_NOTES, sample_subsampling_length] assert len(samples) == self.args.batch_size assert samples[0].shape == (music.NB_NOTES, sample_subsampling_length) # Define targets and inputs for i in range(self.args.sample_length): input = -np.ones([len(samples), music.NB_NOTES]) target = np.zeros([len(samples), music.NB_NOTES]) for j, sample in enumerate(samples): # len(samples) == self.args.batch_size # TODO: Could reuse boolean idx computed (from target to next input) input[j, sample[:, i] == 1] = 1.0 target[j, sample[:, i+1] == 1] = 1.0 batch.inputs.append(input) batch.targets.append(target) batches.append(batch) # Use tf.train.batch() ?? # TODO: Save some batches as midi to see if correct return batches def get_batches_test(self): # TODO: Move that to BatchBuilder """ Return the batches which initiate the RNN when generating The initial batches are loaded from a json file containing the first notes of the song. The note values are the standard midi ones. Here is an examples of an initiator file: ``` {"initiator":[ {"name":"Simple_C4", "seq":[ {"notes":[60]} ]}, {"name":"some_chords", "seq":[ {"notes":[60,64]} {"notes":[66,68,71]} {"notes":[60,64]} ]} ]} ``` Return: List[Batch], List[str]: The generated batches with the associated names """ assert self.args.batch_size == 1 batches = [] names = [] with open(self.TEST_INIT_FILE) as init_file: initiators = json.load(init_file) for initiator in initiators['initiator']: batch = Batch() for seq in initiator['seq']: # We add a few notes new_input = -np.ones([self.args.batch_size, music.NB_NOTES]) # No notes played by default for note in seq['notes']: new_input[0, note] = 1.0 batch.inputs.append(new_input) names.append(initiator['name']) batches.append(batch) return batches, names