Python imblearn.over_sampling.ADASYN Examples

The following are 5 code examples of imblearn.over_sampling.ADASYN(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module imblearn.over_sampling , or try the search function

Example #1

Source File: imblearn_resampling_example.py From hyperparameter_hunter with MIT License

6 votes

def over_sample_ADASYN(train_inputs, train_targets):
    sampler = ADASYN(random_state=32)
    train_inputs, train_targets = _sampler_helper(sampler, train_inputs, train_targets)
    return train_inputs, train_targets

Example #2

Source File: adasyn.py From lale with Apache License 2.0

5 votes

def __init__(self, operator = None, sampling_strategy='auto', random_state=None, n_neighbors=5, n_jobs=1):
        if operator is None:
            raise ValueError("Operator is a required argument.")

        self._hyperparams = {
            'sampling_strategy': sampling_strategy,
            'random_state': random_state,
            'n_neighbors': n_neighbors,
            'n_jobs': n_jobs}
    
        resampler_instance = OrigModel(**self._hyperparams)
        super(ADASYNImpl, self).__init__(
            operator = operator,
            resampler = resampler_instance)

Example #3

Source File: test_imbalance.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_objectmapper_oversampling(self):
        import imblearn.over_sampling as os
        df = pdml.ModelFrame([])
        self.assertIs(df.imbalance.over_sampling.ADASYN,
                      os.ADASYN)
        self.assertIs(df.imbalance.over_sampling.RandomOverSampler,
                      os.RandomOverSampler)
        self.assertIs(df.imbalance.over_sampling.SMOTE,
                      os.SMOTE)

Example #4

Source File: test_imbalance.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_sample(self):
        from imblearn.under_sampling import ClusterCentroids, OneSidedSelection
        from imblearn.over_sampling import ADASYN, SMOTE
        from imblearn.combine import SMOTEENN, SMOTETomek

        models = [ClusterCentroids, OneSidedSelection, ADASYN, SMOTE,
                  SMOTEENN, SMOTETomek]

        X = np.random.randn(100, 5)
        y = np.array([0, 1]).repeat([80, 20])

        df = pdml.ModelFrame(X, target=y, columns=list('ABCDE'))

        for model in models:
            mod1 = model(random_state=self.random_state)
            mod2 = model(random_state=self.random_state)

            df.fit(mod1)
            mod2.fit(X, y)

            result = df.fit_resample(mod1)
            expected_X, expected_y = mod2.fit_resample(X, y)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_numpy_array_equal(result.target.values, expected_y)
            tm.assert_numpy_array_equal(result.data.values, expected_X)
            tm.assert_index_equal(result.columns, df.columns)

            mod1 = model(random_state=self.random_state)
            mod2 = model(random_state=self.random_state)

            result = df.fit_sample(mod1)
            expected_X, expected_y = mod2.fit_sample(X, y)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_numpy_array_equal(result.target.values, expected_y)
            tm.assert_numpy_array_equal(result.data.values, expected_X)
            tm.assert_index_equal(result.columns, df.columns)

Example #5

Source File: oversampling.py From ecg-classification with GNU General Public License v3.0

4 votes

def perform_oversampling(oversamp_method, db_path, oversamp_features_name, tr_features, tr_labels):
    start = time.time()
    
    oversamp_features_pickle_name = db_path + oversamp_features_name + '_' + oversamp_method + '.p'
    print(oversamp_features_pickle_name)

    if True:
        print("Oversampling method:\t" + oversamp_method + " ...")
        # 1 SMOTE
        if oversamp_method == 'SMOTE':  
            #kind={'borderline1', 'borderline2', 'svm'}
            svm_model = svm.SVC(C=0.001, kernel='rbf', degree=3, gamma='auto', decision_function_shape='ovo')
            oversamp = SMOTE(ratio='auto', random_state=None, k_neighbors=5, m_neighbors=10, out_step=0.5, kind='svm', svm_estimator=svm_model, n_jobs=1)

            # PROBAR SMOTE CON OTRO KIND

        elif oversamp_method == 'SMOTE_regular_min':
            oversamp = SMOTE(ratio='minority', random_state=None, k_neighbors=5, m_neighbors=10, out_step=0.5, kind='regular', svm_estimator=None, n_jobs=1)

        elif oversamp_method == 'SMOTE_regular':
            oversamp = SMOTE(ratio='auto', random_state=None, k_neighbors=5, m_neighbors=10, out_step=0.5, kind='regular', svm_estimator=None, n_jobs=1)
  
        elif oversamp_method == 'SMOTE_border':
            oversamp = SMOTE(ratio='auto', random_state=None, k_neighbors=5, m_neighbors=10, out_step=0.5, kind='borderline1', svm_estimator=None, n_jobs=1)
                 
        # 2 SMOTEENN
        elif oversamp_method == 'SMOTEENN':    
            oversamp = SMOTEENN()

        # 3 SMOTE TOMEK
        # NOTE: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.65.3904&rep=rep1&type=pdf
        elif oversamp_method == 'SMOTETomek':
            oversamp = SMOTETomek()

        # 4 ADASYN
        elif oversamp_method == 'ADASYN':
            oversamp = ADASYN(ratio='auto', random_state=None, k=None, n_neighbors=5, n_jobs=cpu_threads)
 
        tr_features_balanced, tr_labels_balanced  = oversamp.fit_sample(tr_features, tr_labels)
        # TODO Write data oversampled!
        print("Writing oversampled data at: " + oversamp_features_pickle_name + " ...")
        np.savetxt('mit_db/' + oversamp_features_name + '_DS1_labels.csv', tr_labels_balanced.astype(int), '%.0f') 
        f = open(oversamp_features_pickle_name, 'wb')
        pickle.dump(tr_features_balanced, f, 2)
        f.close

    end = time.time()

    count = collections.Counter(tr_labels_balanced)
    print("Oversampling balance")
    print(count)
    print("Time required: " + str(format(end - start, '.2f')) + " sec" )

    return tr_features_balanced, tr_labels_balanced