Python config.sample_rate() Examples

The following are 8 code examples of config.sample_rate(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module config , or try the search function .
Example #1
Source File: features.py    From dcase2019_task2 with MIT License 6 votes vote down vote up
def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, fmax):
        '''Log mel feature extractor. 
        
        Args:
          sample_rate: int
          window_size: int
          hop_size: int
          mel_bins: int
          fmin: int, minimum frequency of mel filter banks
          fmax: int, maximum frequency of mel filter banks
        '''
        
        self.window_size = window_size
        self.hop_size = hop_size
        self.window_func = np.hanning(window_size)
        
        self.melW = librosa.filters.mel(
            sr=sample_rate, 
            n_fft=window_size, 
            n_mels=mel_bins, 
            fmin=fmin, 
            fmax=fmax).T
        '''(n_fft // 2 + 1, mel_bins)''' 
Example #2
Source File: prepare_data.py    From music_transcription_MAPS with MIT License 5 votes vote down vote up
def write_audio(path, audio, sample_rate):
    """Write audio sequence to .wav file. 
    
    Args:
      path: string, path to write out .wav file. 
      data: ndarray, audio sequence to write out. 
      sample_rate: int, sample rate to write out. 
      
    Returns: 
      None. 
    """
    soundfile.write(file=path, data=audio, samplerate=sample_rate) 
Example #3
Source File: prepare_data.py    From music_transcription_MAPS with MIT License 5 votes vote down vote up
def logmel(audio):
    """Calculate log Mel spectrogram of an audio sequence. 
    
    Args: 
      audio: 1darray, audio sequence. 
      
    Returns:
      x: ndarray, log Mel spectrogram (n_time, n_freq)
    """
    n_window = cfg.n_window
    n_overlap = cfg.n_overlap
    fs = cfg.sample_rate
    
    ham_win = np.hamming(n_window)
    [f, t, x] = signal.spectral.spectrogram(
                    audio, 
                    window=ham_win,
                    nperseg=n_window, 
                    noverlap=n_overlap, 
                    detrend=False, 
                    return_onesided=True, 
                    mode='magnitude') 
    x = x.T
                    
    if globals().get('melW') is None:
        global melW
        melW = librosa.filters.mel(sr=fs, 
                                n_fft=n_window, 
                                n_mels=229, 
                                fmin=0, 
                                fmax=fs / 2.)
    x = np.dot(x, melW.T)
    x = np.log(x + 1e-8)
    x = x.astype(np.float32)
    return x 
Example #4
Source File: utils.py    From Speech-Transformer with MIT License 5 votes vote down vote up
def extract_feature(input_file, feature='fbank', dim=80, cmvn=True, delta=False, delta_delta=False,
                    window_size=25, stride=10, save_feature=None):
    y, sr = librosa.load(input_file, sr=sample_rate)
    yt, _ = librosa.effects.trim(y, top_db=20)
    yt = normalize(yt)
    ws = int(sr * 0.001 * window_size)
    st = int(sr * 0.001 * stride)
    if feature == 'fbank':  # log-scaled
        feat = librosa.feature.melspectrogram(y=yt, sr=sr, n_mels=dim,
                                              n_fft=ws, hop_length=st)
        feat = np.log(feat + 1e-6)
    elif feature == 'mfcc':
        feat = librosa.feature.mfcc(y=yt, sr=sr, n_mfcc=dim, n_mels=26,
                                    n_fft=ws, hop_length=st)
        feat[0] = librosa.feature.rmse(yt, hop_length=st, frame_length=ws)

    else:
        raise ValueError('Unsupported Acoustic Feature: ' + feature)

    feat = [feat]
    if delta:
        feat.append(librosa.feature.delta(feat[0]))

    if delta_delta:
        feat.append(librosa.feature.delta(feat[0], order=2))
    feat = np.concatenate(feat, axis=0)
    if cmvn:
        feat = (feat - feat.mean(axis=1)[:, np.newaxis]) / (feat.std(axis=1) + 1e-16)[:, np.newaxis]
    if save_feature is not None:
        tmp = np.swapaxes(feat, 0, 1).astype('float32')
        np.save(save_feature, tmp)
        return len(tmp)
    else:
        return np.swapaxes(feat, 0, 1).astype('float32') 
Example #5
Source File: features.py    From dcase2018_task1 with MIT License 5 votes vote down vote up
def __init__(self, sample_rate, window_size, overlap, mel_bins):
        
        self.window_size = window_size
        self.overlap = overlap
        self.ham_win = np.hamming(window_size)
        
        self.melW = librosa.filters.mel(sr=sample_rate, 
                                        n_fft=window_size, 
                                        n_mels=mel_bins, 
                                        fmin=50., 
                                        fmax=sample_rate // 2).T 
Example #6
Source File: features.py    From dcase2018_task1 with MIT License 5 votes vote down vote up
def calculate_logmel(audio_path, sample_rate, feature_extractor):
    
    # Read audio
    (audio, fs) = read_audio(audio_path, target_fs=sample_rate)
    
    '''We do not divide the maximum value of an audio here because we assume 
    the low energy of an audio may also contain information of a scene. '''
    
    # Extract feature
    feature = feature_extractor.transform(audio)
    
    return feature 
Example #7
Source File: prepare_data.py    From dcase2017_task4_cvssp with MIT License 5 votes vote down vote up
def write_audio(path, audio, sample_rate):
    soundfile.write(file=path, data=audio, samplerate=sample_rate)

# Create an empty folder 
Example #8
Source File: prepare_data.py    From music_transcription_MAPS with MIT License 4 votes vote down vote up
def calculate_features(args): 
    """Calculate and write out features & ground truth notes of all songs in MUS 
    directory of all pianos. 
    """
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    feat_type = args.feat_type
    fs = cfg.sample_rate
    tr_pianos = cfg.tr_pianos
    te_pianos = cfg.te_pianos
    pitch_bgn = cfg.pitch_bgn
    pitch_fin = cfg.pitch_fin
    
    out_dir = os.path.join(workspace, "features", feat_type)
    create_folder(out_dir)
    
    # Calculate features for all 9 pianos. 
    cnt = 0
    for piano in tr_pianos + te_pianos:
        audio_dir = os.path.join(dataset_dir, piano, "MUS")
        wav_names = [na for na in os.listdir(audio_dir) if na.endswith('.wav')]
        
        for wav_na in wav_names:
            # Read audio. 
            bare_na = os.path.splitext(wav_na)[0]
            wav_path = os.path.join(audio_dir, wav_na)
            (audio, _) = read_audio(wav_path, target_fs=fs)
            
            # Calculate feature. 
            if feat_type == "spectrogram":
                x = spectrogram(audio)
            elif feat_type == "logmel":
                x = logmel(audio)
            else:
                raise Exception("Error!")
            
            # Read piano roll from txt file. 
            (n_time, n_freq) = x.shape
            txt_path = os.path.join(audio_dir, "%s.txt" % bare_na)
            roll = txt_to_midi_roll(txt_path, max_fr_len=n_time)    # (n_time, 128)
            y = roll[:, pitch_bgn : pitch_fin]      # (n_time, 88)
            
            # Write out data. 
            data = [x, y]
            out_path = os.path.join(out_dir, "%s.p" % bare_na)
            print(cnt, out_path, x.shape, y.shape)
            cPickle.dump(data, open(out_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
            cnt += 1
        
### Pack features.