Python sklearn.utils.shuffle() Examples

The following are 30 code examples for showing how to use sklearn.utils.shuffle(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.utils , or try the search function .

Example 1
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: iterators.py    License: Apache License 2.0 6 votes vote down vote up
def reset(self):
        """Resets the iterator to the beginning of the data."""
        self.curr_idx = 0
        #shuffle data in each bucket
        random.shuffle(self.idx)
        for i, buck in enumerate(self.sentences):
            self.indices[i], self.sentences[i], self.characters[i], self.label[i] = shuffle(self.indices[i],
                                                                                            self.sentences[i],
                                                                                            self.characters[i],
                                                                                            self.label[i])

        self.ndindex = []
        self.ndsent = []
        self.ndchar = []
        self.ndlabel = []

        #for each bucket of data
        for i, buck in enumerate(self.sentences):
            #append the lists with an array
            self.ndindex.append(ndarray.array(self.indices[i], dtype=self.dtype))
            self.ndsent.append(ndarray.array(self.sentences[i], dtype=self.dtype))
            self.ndchar.append(ndarray.array(self.characters[i], dtype=self.dtype))
            self.ndlabel.append(ndarray.array(self.label[i], dtype=self.dtype)) 
Example 2
Project: pyod   Author: yzhao062   File: test_combination.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_aom_static_norepeat(self):
        score = aom(self.scores, 3, method='static',
                    bootstrap_estimators=False,
                    random_state=42)

        assert_equal(score.shape, (4,))

        shuffled_list = shuffle(list(range(0, 6, 1)), random_state=42)
        manual_scores = np.zeros([4, 3])
        manual_scores[:, 0] = np.max(self.scores[:, shuffled_list[0:2]],
                                     axis=1)
        manual_scores[:, 1] = np.max(self.scores[:, shuffled_list[2:4]],
                                     axis=1)
        manual_scores[:, 2] = np.max(self.scores[:, shuffled_list[4:6]],
                                     axis=1)

        manual_score = np.mean(manual_scores, axis=1)
        assert_array_equal(score, manual_score) 
Example 3
Project: pyod   Author: yzhao062   File: test_combination.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_moa_static_norepeat(self):
        score = moa(self.scores, 3, method='static',
                    bootstrap_estimators=False, random_state=42)

        assert_equal(score.shape, (4,))

        shuffled_list = shuffle(list(range(0, 6, 1)), random_state=42)
        manual_scores = np.zeros([4, 3])
        manual_scores[:, 0] = np.mean(self.scores[:, shuffled_list[0:2]],
                                      axis=1)
        manual_scores[:, 1] = np.mean(self.scores[:, shuffled_list[2:4]],
                                      axis=1)
        manual_scores[:, 2] = np.mean(self.scores[:, shuffled_list[4:6]],
                                      axis=1)

        manual_score = np.max(manual_scores, axis=1)
        assert_array_equal(score, manual_score) 
Example 4
Project: redshells   Author: m3dev   File: lda_model.py    License: MIT License 6 votes vote down vote up
def fit(self,
            texts: List[List[str]],
            adjust_passes=True,
            test_size=0.1,
            random_state=123,
            dictionary: Optional[gensim.corpora.Dictionary] = None) -> None:
        texts = shuffle(texts)
        dictionary = dictionary or self._make_dictionary(texts)
        corpus = self._make_corpus(texts=texts, dictionary=dictionary)
        train, test = train_test_split(corpus, test_size=test_size, random_state=random_state)
        passes = np.clip(int(round(100000 / (len(corpus) + 1))), 1, 20) if adjust_passes else 1
        self._lda = gensim.models.LdaModel(
            alpha='auto',
            corpus=train,
            num_topics=self.n_topics,
            id2word=dictionary,
            iterations=self.iterations,
            passes=passes)
        self.log_perplexity = self._lda.log_perplexity(test)
        logger.info('log_perplexity=%s', self.log_perplexity) 
Example 5
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_weight_boosting.py    License: MIT License 6 votes vote down vote up
def test_importances():
    # Check variable importances.
    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=1)

    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg)

        clf.fit(X, y)
        importances = clf.feature_importances_

        assert_equal(importances.shape[0], 10)
        assert_equal((importances[:3, np.newaxis] >= importances[3:]).all(),
                     True) 
Example 6
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_learning_curve_batch_and_incremental_learning_are_equal():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    train_sizes = np.linspace(0.2, 1.0, 5)
    estimator = PassiveAggressiveClassifier(max_iter=1, tol=None,
                                            shuffle=False)

    train_sizes_inc, train_scores_inc, test_scores_inc = \
        learning_curve(
            estimator, X, y, train_sizes=train_sizes,
            cv=3, exploit_incremental_learning=True)
    train_sizes_batch, train_scores_batch, test_scores_batch = \
        learning_curve(
            estimator, X, y, cv=3, train_sizes=train_sizes,
            exploit_incremental_learning=False)

    assert_array_equal(train_sizes_inc, train_sizes_batch)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1)) 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def check_cross_val_predict_multiclass(est, X, y, method):
    """Helper for tests of cross_val_predict with multiclass classification"""
    cv = KFold(n_splits=3, shuffle=False)

    # Generate expected outputs
    float_min = np.finfo(np.float64).min
    default_values = {'decision_function': float_min,
                      'predict_log_proba': float_min,
                      'predict_proba': 0}
    expected_predictions = np.full((len(X), len(set(y))),
                                   default_values[method],
                                   dtype=np.float64)
    _, y_enc = np.unique(y, return_inverse=True)
    for train, test in cv.split(X, y_enc):
        est = clone(est).fit(X[train], y_enc[train])
        fold_preds = getattr(est, method)(X[test])
        i_cols_fit = np.unique(y_enc[train])
        expected_predictions[np.ix_(test, i_cols_fit)] = fold_preds

    # Check actual outputs for several representations of y
    for tg in [y, y + 1, y - 2, y.astype('str')]:
        assert_allclose(cross_val_predict(est, X, tg, method=method, cv=cv),
                        expected_predictions) 
Example 8
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_data.py    License: MIT License 6 votes vote down vote up
def test_power_transformer_nans(method):
    # Make sure lambda estimation is not influenced by NaN values
    # and that transform() supports NaN silently

    X = np.abs(X_1col)
    pt = PowerTransformer(method=method)
    pt.fit(X)
    lmbda_no_nans = pt.lambdas_[0]

    # concat nans at the end and check lambda stays the same
    X = np.concatenate([X, np.full_like(X, np.nan)])
    X = shuffle(X, random_state=0)

    pt.fit(X)
    lmbda_nans = pt.lambdas_[0]

    assert_almost_equal(lmbda_no_nans, lmbda_nans, decimal=5)

    X_trans = pt.transform(X)
    assert_array_equal(np.isnan(X_trans), np.isnan(X)) 
Example 9
Project: cv-tricks.com   Author: legolas123   File: dataset.py    License: MIT License 6 votes vote down vote up
def read_train_sets(train_path, image_size, classes, validation_size):
  class DataSets(object):
    pass
  data_sets = DataSets()

  images, labels, img_names, cls = load_train(train_path, image_size, classes)
  images, labels, img_names, cls = shuffle(images, labels, img_names, cls)  

  if isinstance(validation_size, float):
    validation_size = int(validation_size * images.shape[0])

  validation_images = images[:validation_size]
  validation_labels = labels[:validation_size]
  validation_img_names = img_names[:validation_size]
  validation_cls = cls[:validation_size]

  train_images = images[validation_size:]
  train_labels = labels[validation_size:]
  train_img_names = img_names[validation_size:]
  train_cls = cls[validation_size:]

  data_sets.train = DataSet(train_images, train_labels, train_img_names, train_cls)
  data_sets.valid = DataSet(validation_images, validation_labels, validation_img_names, validation_cls)

  return data_sets 
Example 10
Project: Neural-Network-Programming-with-TensorFlow   Author: PacktPublishing   File: dataset.py    License: MIT License 6 votes vote down vote up
def next_batch(self, batch_size):
    """Return the next `batch_size` examples from this data set."""
    start = self._index_in_epoch
    self._index_in_epoch += batch_size

    if self._index_in_epoch > self._num_examples:
      # Finished epoch
      self._epochs_completed += 1

      # # Shuffle the data (maybe)
      # perm = np.arange(self._num_examples)
      # np.random.shuffle(perm)
      # self._images = self._images[perm]
      # self._labels = self._labels[perm]
      # Start next epoch

      start = 0
      self._index_in_epoch = batch_size
      assert batch_size <= self._num_examples
    end = self._index_in_epoch

    return self._images[start:end], self._labels[start:end], self._ids[start:end], self._cls[start:end] 
Example 11
Project: Neural-Network-Programming-with-TensorFlow   Author: PacktPublishing   File: dataset.py    License: MIT License 6 votes vote down vote up
def read_train_sets(train_path, image_size, classes, validation_size=0):
  class DataSets(object):
    pass
  data_sets = DataSets()

  images, labels, ids, cls = load_train(train_path, image_size, classes)
  images, labels, ids, cls = shuffle(images, labels, ids, cls)  # shuffle the data

  if isinstance(validation_size, float):
    validation_size = int(validation_size * images.shape[0])

  validation_images = images[:validation_size]
  validation_labels = labels[:validation_size]
  validation_ids = ids[:validation_size]
  validation_cls = cls[:validation_size]

  train_images = images[validation_size:]
  train_labels = labels[validation_size:]
  train_ids = ids[validation_size:]
  train_cls = cls[validation_size:]

  data_sets.train = DataSet(train_images, train_labels, train_ids, train_cls)
  data_sets.valid = DataSet(validation_images, validation_labels, validation_ids, validation_cls)

  return data_sets 
Example 12
Project: adversarial-autoencoder   Author: hjweide   File: utils.py    License: MIT License 6 votes vote down vote up
def load_mnist():
    with open('mnist/train-images-idx3-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    X_train = data[16:].reshape(60000, 28 * 28).astype(np.float32)
    with open('mnist/train-labels-idx1-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    y_train = data[8:].reshape(60000).astype(np.uint8)

    with open('mnist/t10k-images-idx3-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    X_test = data[16:].reshape(10000, 28 * 28).astype(np.float32)
    with open('mnist/t10k-labels-idx1-ubyte', 'rb') as f:
        data = np.fromfile(file=f, dtype=np.uint8)
    y_test = data[8:].reshape(10000).astype(np.uint8)

    X_train, y_train = shuffle(X_train, y_train)
    X_test, y_test = shuffle(X_test, y_test)

    X_train /= 255.
    X_test /= 255.

    return X_train, y_train, X_test, y_test 
Example 13
Project: training_results_v0.6   Author: mlperf   File: iterators.py    License: Apache License 2.0 6 votes vote down vote up
def reset(self):
        """Resets the iterator to the beginning of the data."""
        self.curr_idx = 0
        #shuffle data in each bucket
        random.shuffle(self.idx)
        for i, buck in enumerate(self.sentences):
            self.indices[i], self.sentences[i], self.characters[i], self.label[i] = shuffle(self.indices[i],
                                                                                            self.sentences[i],
                                                                                            self.characters[i],
                                                                                            self.label[i])

        self.ndindex = []
        self.ndsent = []
        self.ndchar = []
        self.ndlabel = []

        #for each bucket of data
        for i, buck in enumerate(self.sentences):
            #append the lists with an array
            self.ndindex.append(ndarray.array(self.indices[i], dtype=self.dtype))
            self.ndsent.append(ndarray.array(self.sentences[i], dtype=self.dtype))
            self.ndchar.append(ndarray.array(self.characters[i], dtype=self.dtype))
            self.ndlabel.append(ndarray.array(self.label[i], dtype=self.dtype)) 
Example 14
Project: Kitchen2D   Author: zi-w   File: helper.py    License: MIT License 6 votes vote down vote up
def gen_biased_data(func, pos_ratio, N):
    '''
    Generate N data points on function func, with pos_ratio percentage of the 
    data points to have a positive label.
    '''
    pos = []
    neg = []
    i = 0
    while len(pos) < pos_ratio * N or len(neg) < N - pos_ratio * N:
        x = np.random.uniform(func.x_range[0], func.x_range[1])
        y = func(x)
        if y > 0:
            if len(pos) < pos_ratio * N:
                pos.append(np.hstack((x, y)))
        elif len(neg) < N - pos_ratio * N:
            neg.append(np.hstack((x, y)))
    xy = np.vstack((pos, neg))
    xy = shuffle(xy)
    return xy[:, :-1], xy[:, -1] 
Example 15
Project: FATE   Author: FederatedAI   File: functional_autoencoder_test.py    License: Apache License 2.0 6 votes vote down vote up
def getKaggleMNIST(file_path):

    # MNIST data:
    # column 0 is labels
    # column 1-785 is data, with values 0 .. 255
    # total size of CSV: (42000, 1, 28, 28)

    train = pd.read_csv(file_path)
    train = train.as_matrix()
    train = shuffle(train)

    Xtrain = train[:-1000, 1:] / 255
    Ytrain = train[:-1000, 0].astype(np.int32)
    Xtest  = train[-1000:, 1:] / 255
    Ytest  = train[-1000:, 0].astype(np.int32)

    return Xtrain, Ytrain, Xtest, Ytest 
Example 16
Project: BirdCLEF-Baseline   Author: kahst   File: spec.py    License: MIT License 6 votes vote down vote up
def getSpecs(path):
    
    specs = []
    noise = []

    # Get mel-specs for file
    for spec in audio.specsFromFile(path,
                                    rate=cfg.SAMPLE_RATE,
                                    seconds=cfg.SPEC_LENGTH,
                                    overlap=cfg.SPEC_OVERLAP,
                                    minlen=cfg.SPEC_MINLEN,
                                    fmin=cfg.SPEC_FMIN,
                                    fmax=cfg.SPEC_FMAX,
                                    spec_type=cfg.SPEC_TYPE,
                                    shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0])):

        # Determine signal to noise ratio
        s2n = audio.signal2noise(spec)
        specs.append(spec)
        noise.append(s2n)

    # Shuffle arrays (we want to select randomly later)
    specs, noise = shuffle(specs, noise, random_state=RANDOM)

    return specs, noise 
Example 17
Project: qiskit-aqua   Author: Qiskit   File: vqc.py    License: Apache License 2.0 6 votes vote down vote up
def batch_data(self, data, labels=None, minibatch_size=-1):
        """ batch data """
        label_batches = None

        if 0 < minibatch_size < len(data):
            batch_size = min(minibatch_size, len(data))
            if labels is not None:
                shuffled_samples, shuffled_labels = shuffle(data, labels,
                                                            random_state=aqua_globals.random_seed)
                label_batches = np.array_split(shuffled_labels, batch_size)
            else:
                shuffled_samples = shuffle(data, random_state=aqua_globals.random_seed)
            batches = np.array_split(shuffled_samples, batch_size)
        else:
            batches = np.asarray([data])
            label_batches = np.asarray([labels])
        return batches, label_batches 
Example 18
Project: models   Author: chainer   File: train.py    License: MIT License 6 votes vote down vote up
def run_epoch():
    for xmb, mmb, ymb in iter_data(*shuffle(trX, trM, trYt, random_state=np.random),
                                   n_batch=n_batch_train, truncate=True, verbose=True):
        global n_updates
        XMB = model.xp.asarray(xmb)
        YMB = model.xp.asarray(ymb)
        MMB = model.xp.asarray(mmb)
        h = model(XMB)
        lm_logits = lm_head(h)
        clf_logits = clf_head(h, XMB)
        compute_loss_fct(XMB, YMB, MMB, clf_logits, lm_logits)
        n_updates += 1
        if n_updates in [
                1000,
                2000,
                4000,
                8000,
                16000,
                32000] and n_epochs == 0:
            log() 
Example 19
Project: blow   Author: joansj   File: classify.py    License: Apache License 2.0 5 votes vote down vote up
def batch_loop(e,r,x,y,eval):
    if eval:
        model.eval()
    else:
        model.train()
        r=shuffle(r)
    losses=[]
    predictions=[]
    for b in range(0,len(r),sbatch):
        if b+sbatch>len(r):
            rr=r[b:]
        else:
            rr=r[b:b+sbatch]
        rr=torch.LongTensor(rr)
        xb=x[rr,:].to(args.device)
        yb=y[rr].to(args.device)
        ybhat=model.forward(xb)
        loss=loss_function(ybhat,yb)
        losses+=list(loss.data.cpu().numpy())
        predictions+=list(ybhat.data.max(1)[1].cpu().numpy())
        if not eval:
            loss=loss.mean()
            optim.zero_grad()
            loss.backward()
            optim.step()
        print('\rEpoch {:03d}/{:03d} - {:5.1f}% : loss = {:7.3f}'.format(e+1,nepochs,100*len(losses)/len(x),np.mean(losses)),end='')
    return losses,predictions 
Example 20
Project: malss   Author: canard0328   File: data.py    License: MIT License 5 votes vote down vote up
def fit_transform(self, X, y=None):
        if isinstance(X, np.ndarray):
            self.X = pd.DataFrame(X)
            if y is not None:
                self.y = pd.Series(y)
        else:
            self.X = X.copy(deep=True)
            if y is not None:
                if isinstance(y, pd.Series):
                    self.y = y.copy(deep=True)
                else:
                    self.y = y.iloc[:, 0]  # Convert Dataframe to Series
        if not isinstance(self.X, pd.DataFrame):
            raise ValueError(f'{type(X)} is not supported')
        if y is not None and len(X) != len(y):
            raise ValueError(('Found input variables with inconsistent '
                             f'numbers of samples: [{len(X)}, {len(y)}]'))
        self.shape_before = self.X.shape

        self.X, self.col_was_null = self.__impute(self.X)

        self._label_encoder = None
        self._onehot_encoder = None
        self.X, self.del_columns = self.__encode(self.X)

        self._standardizer = None
        if self.standardize:
            self.X = self.__standardize(self.X)

        if self.shuffle:
            if self.y is not None:
                self.X, self.y = sk_shuffle(self.X, self.y,
                                            random_state=self.random_state)
            else:
                self.X = sk_shuffle(self.X, random_state=self.random_state) 
Example 21
Project: cloudless   Author: BradNeuberg   File: prepare_data.py    License: Apache License 2.0 5 votes vote down vote up
def _split_data_sets(details):
    """
    Shuffles and splits our datasets into training and validation sets.
    """
    image_paths = details["image_paths"]
    targets = details["targets"]

    print "\tShuffling data..."
    (image_paths, targets) = shuffle(image_paths, targets, random_state=0)

    print "\tSplitting data 80% training, 20% validation..."
    return train_test_split(image_paths, targets, train_size=0.8, test_size=0.2, \
      random_state=0) 
Example 22
Project: libact   Author: ntucllab   File: test_hierarchical_sampling.py    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
def setUp(self):
        iris = datasets.load_iris()
        X, y = shuffle(iris.data, iris.target, random_state=1126)
        self.X = X.tolist()
        self.y = y.tolist()
        self.classes = list(set(self.y)) 
Example 23
Project: EvolutionaryGAN   Author: WANG-Chaoyue   File: data_utils.py    License: MIT License 5 votes vote down vote up
def shuffle(*arrays, **options):
    if isinstance(arrays[0][0], basestring):
        return list_shuffle(*arrays)
    else:
        return skutils.shuffle(*arrays, random_state=np_rng) 
Example 24
Project: DeepLearning-IDS   Author: rambasnet   File: keras-theano.py    License: MIT License 5 votes vote down vote up
def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df 
Example 25
Project: DeepLearning-IDS   Author: rambasnet   File: keras-tensorflow.py    License: MIT License 5 votes vote down vote up
def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df 
Example 26
Project: DeepLearning-IDS   Author: rambasnet   File: fastai-expriments.py    License: MIT License 5 votes vote down vote up
def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df 
Example 27
Project: DeepLearning-IDS   Author: rambasnet   File: fastai-expriments.py    License: MIT License 5 votes vote down vote up
def experimentIndividual(dataFile, epochs=5, normalize=False):
    # procs = [FillMissing, Categorify, Normalize]
    procs = [FillMissing, Categorify]
    if normalize:
        procs.append(Normalize)

    seed = 7
    np.random.seed(seed)
    # load data
    data = loadData(dataFile)
    # define 10-fold cross validation test harness
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    cvscores = []
    fold = 1
    for train_idx, test_idx in kfold.split(data.index, data[dep_var]):
        print('running fold = ', fold)
        fold += 1
        # create model
        data_fold = (TabularList.from_df(data, path=dataPath, cat_names=cat_names, cont_names=cont_names, procs=procs)
                     .split_by_idxs(train_idx, test_idx)
                     .label_from_df(cols=dep_var)
                     .databunch())
        # create model and learn
        model = tabular_learner(
            data_fold, layers=[200, 100], metrics=accuracy, callback_fns=ShowGraph)
        model.fit(epochs, 1e-2)
        model.save('{}.model'.format(os.path.basename(dataFile)))
        # train the model, iterating on the data in batches of batch_size
        # evaluate the model
        loss, acc = model.validate()
        print('loss {}: accuracy: {:.2f}%'.format(loss, acc*100))
        cvscores.append(acc*100)
        resultFile = os.path.join(resultPath, dataFile)
        with open('{}.result'.format(resultFile), 'a') as fout:
            fout.write(
                'accuracy: {:.2f} std-dev: {:.2f}\n'.format(np.mean(cvscores), np.std(cvscores))) 
Example 28
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_common.py    License: MIT License 5 votes vote down vote up
def test_sample_order_invariance(name):
    random_state = check_random_state(0)
    y_true = random_state.randint(0, 2, size=(20, ))
    y_pred = random_state.randint(0, 2, size=(20, ))
    y_true_shuffle, y_pred_shuffle = shuffle(y_true, y_pred, random_state=0)

    with ignore_warnings():
        metric = ALL_METRICS[name]
        assert_allclose(metric(y_true, y_pred),
                        metric(y_true_shuffle, y_pred_shuffle),
                        err_msg="%s is not sample order invariant" % name) 
Example 29
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_common.py    License: MIT License 5 votes vote down vote up
def test_sample_order_invariance_multilabel_and_multioutput():
    random_state = check_random_state(0)

    # Generate some data
    y_true = random_state.randint(0, 2, size=(20, 25))
    y_pred = random_state.randint(0, 2, size=(20, 25))
    y_score = random_state.normal(size=y_true.shape)

    y_true_shuffle, y_pred_shuffle, y_score_shuffle = shuffle(y_true,
                                                              y_pred,
                                                              y_score,
                                                              random_state=0)

    for name in MULTILABELS_METRICS:
        metric = ALL_METRICS[name]
        assert_allclose(metric(y_true, y_pred),
                        metric(y_true_shuffle, y_pred_shuffle),
                        err_msg="%s is not sample order invariant" % name)

    for name in THRESHOLDED_MULTILABEL_METRICS:
        metric = ALL_METRICS[name]
        assert_allclose(metric(y_true, y_score),
                        metric(y_true_shuffle, y_score_shuffle),
                        err_msg="%s is not sample order invariant" % name)

    for name in MULTIOUTPUT_METRICS:
        metric = ALL_METRICS[name]
        assert_allclose(metric(y_true, y_score),
                        metric(y_true_shuffle, y_score_shuffle),
                        err_msg="%s is not sample order invariant" % name)
        assert_allclose(metric(y_true, y_pred),
                        metric(y_true_shuffle, y_pred_shuffle),
                        err_msg="%s is not sample order invariant" % name) 
Example 30
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_utils.py    License: MIT License 5 votes vote down vote up
def test_shuffle_on_ndim_equals_three():
    def to_tuple(A):    # to make the inner arrays hashable
        return tuple(tuple(tuple(C) for C in B) for B in A)

    A = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # A.shape = (2,2,2)
    S = set(to_tuple(A))
    shuffle(A)  # shouldn't raise a ValueError for dim = 3
    assert_equal(set(to_tuple(A)), S)