Python sklearn.utils.gen_even_slices() Examples

The following are 9 code examples of sklearn.utils.gen_even_slices(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils , or try the search function .
Example #1
Source File: audio.py    From freesound-classification with Apache License 2.0 5 votes vote down vote up
def shuffle_audio(audio, chunk_length=0.5, sr=None):

    n_chunks = int((audio.size / sr) / chunk_length)

    if n_chunks in (0, 1):
        return audio

    slices = list(gen_even_slices(audio.size, n_chunks))
    random.shuffle(slices)

    shuffled = np.concatenate([audio[s] for s in slices])

    return shuffled 
Example #2
Source File: test_utils.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_gen_even_slices():
    # check that gen_even_slices contains all samples
    some_range = range(10)
    joined_range = list(chain(*[some_range[slice] for slice in
                                gen_even_slices(10, 3)]))
    assert_array_equal(some_range, joined_range)

    # check that passing negative n_chunks raises an error
    slices = gen_even_slices(10, -1)
    assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be"
                        " >=1", next, slices) 
Example #3
Source File: pairwise.py    From trajminer with MIT License 5 votes vote down vote up
def pairwise_similarity(X, Y=None, measure=None, n_jobs=1):
    """Computes the similarity between trajectories in X and Y.

    Parameters
    ----------
    X : array-like, shape: (n_trajectories_X, n_points, n_features)
        Input data.
    Y : array-like, shape: (n_trajectories_Y, n_points, n_features)
        Input data. If ``None``, the output will be the pairwise
        similarities between all samples in ``X``.
    measure : SimilarityMeasure object (default=None)
        The similarity measure to use for computing similarities. See
        :mod:`trajminer.similarity`.
    n_jobs : int (default=1)
        The number of parallel jobs.

    Returns
    -------
    similarities : array
        An array with shape (n_trajectories_X, n_trajectories_Y).
    """
    def compute_slice(X, Y, s):
        matrix = np.zeros(shape=(len(X), len(Y)))

        for i in range(s.start + 1, len(X)):
            for j in range(0, min(len(Y), i - s.start)):
                matrix[i][j] = measure.similarity(X[i], Y[j])
        return matrix

    upper = Y is not None
    Y = X if not Y else Y
    func = delayed(compute_slice)

    similarity = Parallel(n_jobs=n_jobs, verbose=0)(
        func(X, Y[s], s) for s in gen_even_slices(len(Y), n_jobs))
    similarity = np.hstack(similarity)

    if not upper:
        similarity += similarity.transpose() + np.identity(len(X))

    return similarity 
Example #4
Source File: trajectory_data.py    From trajminer with MIT License 5 votes vote down vote up
def _to_csv(self, file, n_jobs):
        lat_lon = -1
        tids = self.get_tids()

        def build_lines(s):
            lines = []
            for i in range(s.start, s.stop):
                tid = tids[i]
                label = self.get_label(tid)
                traj = self.get_trajectory(tid)

                for p in traj:
                    if lat_lon > -1:
                        p[lat_lon] = str(p[lat_lon][0]) + \
                            ',' + str(p[lat_lon][1])
                    fmt = str(p)[1:-1].replace(', ', ',').replace("'", '')
                    lines.append(str(tid) + ',' + str(label) + ',' + fmt)
            return lines

        with open(file, 'w') as out:
            header = 'tid,label'

            for i, attr in enumerate(self.get_attributes()):
                if attr == 'lat_lon':
                    header += ',lat,lon'
                    lat_lon = i
                else:
                    header += ',' + attr

            out.write(header + '\n')
            func = delayed(build_lines)
            lines = Parallel(n_jobs=n_jobs, verbose=0)(
                func(s) for s in gen_even_slices(len(tids), n_jobs))

            lines = np.concatenate(lines)
            lines = '\n'.join(lines)
            out.write(lines)
            out.close() 
Example #5
Source File: test_utils.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_gen_even_slices():
    # check that gen_even_slices contains all samples
    some_range = range(10)
    joined_range = list(chain(*[some_range[slice] for slice in
                                gen_even_slices(10, 3)]))
    assert_array_equal(some_range, joined_range)

    # check that passing negative n_chunks raises an error
    slices = gen_even_slices(10, -1)
    assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be"
                        " >=1", next, slices) 
Example #6
Source File: generate_sub_final_ensemble.py    From kaggle_carvana_segmentation with MIT License 4 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-j', '--n_jobs', type=int, default=1, metavar='N',
                        help='number of parallel jobs')
    parser.add_argument('--load', action='store_true',
                        help='load pregenerated probs from folder?')
    parser.add_argument('--no_save', action='store_true',
                        help='not save probs as pngs?')

    args = parser.parse_args()

    probs_dirs = [
        ('test_scratch2', 1.0),
        ('test_vgg11v1_final', 1.0),
        ('albu27.09', 1.0),
        ('ternaus27', 1.0),
    ]
    w_sum = sum([x[1] for x in probs_dirs])
    print 'W_sum=', w_sum
    probs_dirs = map(lambda x: (Path(join(config.submissions_dir, x[0])), float(x[1]) / w_sum), probs_dirs)
    print 'Weights:', [x[1] for x in probs_dirs]
    output_dir = Path(config.submissions_dir) / ('ens_scratch2(1)_v1-final(1)_al27(1)_te27(1)')

    with open(str(output_dir) + '.txt', mode='w') as f:
        f.write('Following models were averaged:\n')
        for l, w in probs_dirs:
            f.write(str(l) + '; weight={}\n'.format(w))
            print str(l.stem) + '; weight={}\n'.format(w)
    print '===='
    test_pathes = CARVANA.get_test_paths(is_hq=True)

    print 'Reading from', map(str, probs_dirs)
    print 'output_dir', output_dir

    if not args.load:
        fd = delayed(average_from_files)
        ret = Parallel(n_jobs=args.n_jobs, verbose=0)(
            fd(test_pathes[s], probs_dirs=probs_dirs,
               output_dir=output_dir, is_quiet=(i > 0),
               should_save_masks=not args.no_save)
            for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs)))
    else:
        fd = delayed(load_from_files)
        ret = Parallel(n_jobs=args.n_jobs, verbose=0)(
            fd(test_pathes[s], output_dir=output_dir, is_quiet=(i > 0))
            for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs)))

    df = pd.concat(ret, axis=0)

    output_path = str(output_dir) + '.csv'
    create_submission(df, str(output_path)) 
Example #7
Source File: generate_sub_average.py    From kaggle_carvana_segmentation with MIT License 4 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-j', '--n_jobs', type=int, default=1, metavar='N',
                        help='number of parallel jobs')
    parser.add_argument('--load', action='store_true',
                        help='load pregenerated probs from folder?')
    parser.add_argument('--net_name', choices=['scratch', 'vgg11v1'])
    args = parser.parse_args()
    print 'config.submissions_dir', config.submissions_dir

    if args.net_name == 'vgg11v1':
        probs_dirs = list()
        for fold_id in xrange(7):
            dirs = glob.glob(join(config.submissions_dir,
                                  'test_probs_vgg11v1_s1993_im1024_gacc1_aug1_v2fold{}.7_noreg_epoch*'.format(fold_id)))
            epochs = map(lambda x: int(x.rsplit('_epoch', 1)[1]), dirs)
            last_epoch_dir = sorted(zip(epochs, dirs))[-1][1]
            probs_dirs.append(last_epoch_dir)
        print map(lambda x: os.path.basename(x), probs_dirs)
        output_dir = Path(config.submissions_dir) / ('test_vgg11v1_final')

    elif args.net_name == 'scratch':
        probs_dirs = list()
        for fold_id in xrange(7):
            dirs = glob.glob(join(config.submissions_dir,
                                  'test_probs_scratch_s1993_im1024_aug1_fold{}.7_epoch*'.format(fold_id)))
            epochs = map(lambda x: int(x.rsplit('_epoch', 1)[1]), dirs)
            last_epoch_dir = sorted(zip(epochs, dirs))[-1][1]
            probs_dirs.append(last_epoch_dir)
        print map(lambda x: os.path.basename(x), probs_dirs)
        output_dir = Path(config.submissions_dir) / ('test_scratch2')
    else:
        raise ValueError('Unknown net_name {}'.format(args.net_name))

    probs_dirs = map(Path, probs_dirs)
    with open(str(output_dir) + '.txt', mode='w') as f:
        f.write('Following models were averaged:\n')
        for l in probs_dirs:
            f.write(str(l) + '\n')
    test_pathes = CARVANA.get_test_paths(is_hq=True)

    print 'Reading from', map(str, probs_dirs)
    print 'output_dir', output_dir

    if not args.load:
        fd = delayed(average_from_files)
        ret = Parallel(n_jobs=args.n_jobs, verbose=0)(
            fd(test_pathes[s], probs_dirs=probs_dirs, output_dir=output_dir, is_quiet=(i > 0))
            for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs)))
    else:
        fd = delayed(load_from_files)
        ret = Parallel(n_jobs=args.n_jobs, verbose=0)(
            fd(test_pathes[s], output_dir=output_dir, is_quiet=(i > 0))
            for i, s in enumerate(gen_even_slices(len(test_pathes), args.n_jobs)))

    df = pd.concat(ret, axis=0)

    output_path = str(output_dir) + '.csv'
    create_submission(df, str(output_path)) 
Example #8
Source File: segmentation.py    From trajminer with MIT License 4 votes vote down vote up
def fit_transform(self, X):
        """Fit and segment trajectories.

        Parameters
        ----------
        X : :class:`trajminer.TrajectoryData`
            Input dataset to segment.

        Returns
        -------
        X_out : :class:`trajminer.TrajectoryData`
            Segmented dataset.
        """
        tids = X.get_tids()

        def segment(X, s):
            def check_segment(p1, p2):
                b = []
                for i, attr in enumerate(self.attributes):
                    f = self.thresholds[attr]
                    b.append(f(p1[i], p2[i]))
                return np.any(b) if self.mode == 'any' else np.all(b)

            ret = []

            for t in range(s.start, s.stop):
                subret = []
                traj = X.get_trajectory(tids[t])
                s = [traj[0]]

                for i in range(1, len(traj)):
                    if check_segment(traj[i - 1], traj[i]):
                        subret.append(s)
                        s = [traj[i]]
                    else:
                        s.append(traj[i])
                subret.append(s)
                ret.append(subret)

            return ret

        func = delayed(segment)
        segments = Parallel(n_jobs=self.n_jobs, verbose=0)(
            func(X, s) for s in gen_even_slices(len(X.get_trajectories()),
                                                self.n_jobs))
        labels = X.get_labels()
        segments = np.squeeze(segments)
        new_labels = None

        if labels is not None:
            new_labels = []

            for idx, l in enumerate(labels):
                new_labels.extend(np.full(len(segments[idx]), l))

        segments = np.squeeze(segments)
        new_tids = np.r_[1:len(segments) + 1]
        return TrajectoryData(attributes=X.get_attributes(),
                              data=segments,
                              tids=new_tids,
                              labels=new_labels) 
Example #9
Source File: filter.py    From trajminer with MIT License 4 votes vote down vote up
def filter_duplicate_points(data, criterium, remove_first=True, inplace=True,
                            n_jobs=1):
    """Removes duplicates of trajectory points according to the given criteria.

    Parameters
    ----------
    data : :class:`trajminer.TrajectoryData`
        The dataset to be filtered.
    criterium : callable
        A callable that takes two trajectory points and decides wheter or not
        they are duplicates. If `True`, then one of the points is removed from
        the dataset (the first or the last point, depending on the
        `remove_first` parameter).
    remove_first : bool (default=True)
        If `True`, then whenever duplicates are found, the first point is
        removed. Otherwise, the last one is removed from the dataset.
    inplace : bool (default=True)
        If `True` modifies the current object, otherwise returns a new
        object.
    n_jobs : int (default=1)
        The number of parallel jobs.

    Returns
    -------
    dataset : :class:`trajminer.TrajectoryData`
        The filtered dataset. If `inplace=True`, then returns the modified
        current object.
    """
    tids = data.get_tids()

    def filter_slice(s):
        n_data = []

        for t in range(s.start, s.stop):
            traj = np.copy(data.get_trajectory(tids[t]))
            i = 1

            while i < len(traj):
                if not criterium(traj[i-1], traj[i]):
                    i += 1
                elif remove_first:
                    traj = np.delete(traj, i-1, axis=0)
                else:
                    traj = np.delete(traj, i, axis=0)
            n_data.append(traj)

        return n_data

    func = delayed(filter_slice)
    ret = Parallel(n_jobs=n_jobs, verbose=0)(
        func(s) for s in gen_even_slices(len(tids), n_jobs))

    n_data = np.concatenate(ret)

    if inplace:
        data._update(data.get_attributes(), n_data, data.get_tids(),
                     data.get_labels())
        return data

    return TrajectoryData(data.get_attributes(), n_data, data.get_tids(),
                          data.get_labels())