Python imblearn.over_sampling.ADASYN Examples

The following are 5 code examples of imblearn.over_sampling.ADASYN(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module imblearn.over_sampling , or try the search function .
Example #1
Source File: imblearn_resampling_example.py    From hyperparameter_hunter with MIT License 6 votes vote down vote up
def over_sample_ADASYN(train_inputs, train_targets):
    sampler = ADASYN(random_state=32)
    train_inputs, train_targets = _sampler_helper(sampler, train_inputs, train_targets)
    return train_inputs, train_targets 
Example #2
Source File: adasyn.py    From lale with Apache License 2.0 5 votes vote down vote up
def __init__(self, operator = None, sampling_strategy='auto', random_state=None, n_neighbors=5, n_jobs=1):
        if operator is None:
            raise ValueError("Operator is a required argument.")

        self._hyperparams = {
            'sampling_strategy': sampling_strategy,
            'random_state': random_state,
            'n_neighbors': n_neighbors,
            'n_jobs': n_jobs}
    
        resampler_instance = OrigModel(**self._hyperparams)
        super(ADASYNImpl, self).__init__(
            operator = operator,
            resampler = resampler_instance) 
Example #3
Source File: test_imbalance.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper_oversampling(self):
        import imblearn.over_sampling as os
        df = pdml.ModelFrame([])
        self.assertIs(df.imbalance.over_sampling.ADASYN,
                      os.ADASYN)
        self.assertIs(df.imbalance.over_sampling.RandomOverSampler,
                      os.RandomOverSampler)
        self.assertIs(df.imbalance.over_sampling.SMOTE,
                      os.SMOTE) 
Example #4
Source File: test_imbalance.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_sample(self):
        from imblearn.under_sampling import ClusterCentroids, OneSidedSelection
        from imblearn.over_sampling import ADASYN, SMOTE
        from imblearn.combine import SMOTEENN, SMOTETomek

        models = [ClusterCentroids, OneSidedSelection, ADASYN, SMOTE,
                  SMOTEENN, SMOTETomek]

        X = np.random.randn(100, 5)
        y = np.array([0, 1]).repeat([80, 20])

        df = pdml.ModelFrame(X, target=y, columns=list('ABCDE'))

        for model in models:
            mod1 = model(random_state=self.random_state)
            mod2 = model(random_state=self.random_state)

            df.fit(mod1)
            mod2.fit(X, y)

            result = df.fit_resample(mod1)
            expected_X, expected_y = mod2.fit_resample(X, y)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_numpy_array_equal(result.target.values, expected_y)
            tm.assert_numpy_array_equal(result.data.values, expected_X)
            tm.assert_index_equal(result.columns, df.columns)

            mod1 = model(random_state=self.random_state)
            mod2 = model(random_state=self.random_state)

            result = df.fit_sample(mod1)
            expected_X, expected_y = mod2.fit_sample(X, y)

            self.assertIsInstance(result, pdml.ModelFrame)
            tm.assert_numpy_array_equal(result.target.values, expected_y)
            tm.assert_numpy_array_equal(result.data.values, expected_X)
            tm.assert_index_equal(result.columns, df.columns) 
Example #5
Source File: oversampling.py    From ecg-classification with GNU General Public License v3.0 4 votes vote down vote up
def perform_oversampling(oversamp_method, db_path, oversamp_features_name, tr_features, tr_labels):
    start = time.time()
    
    oversamp_features_pickle_name = db_path + oversamp_features_name + '_' + oversamp_method + '.p'
    print(oversamp_features_pickle_name)

    if True:
        print("Oversampling method:\t" + oversamp_method + " ...")
        # 1 SMOTE
        if oversamp_method == 'SMOTE':  
            #kind={'borderline1', 'borderline2', 'svm'}
            svm_model = svm.SVC(C=0.001, kernel='rbf', degree=3, gamma='auto', decision_function_shape='ovo')
            oversamp = SMOTE(ratio='auto', random_state=None, k_neighbors=5, m_neighbors=10, out_step=0.5, kind='svm', svm_estimator=svm_model, n_jobs=1)

            # PROBAR SMOTE CON OTRO KIND

        elif oversamp_method == 'SMOTE_regular_min':
            oversamp = SMOTE(ratio='minority', random_state=None, k_neighbors=5, m_neighbors=10, out_step=0.5, kind='regular', svm_estimator=None, n_jobs=1)

        elif oversamp_method == 'SMOTE_regular':
            oversamp = SMOTE(ratio='auto', random_state=None, k_neighbors=5, m_neighbors=10, out_step=0.5, kind='regular', svm_estimator=None, n_jobs=1)
  
        elif oversamp_method == 'SMOTE_border':
            oversamp = SMOTE(ratio='auto', random_state=None, k_neighbors=5, m_neighbors=10, out_step=0.5, kind='borderline1', svm_estimator=None, n_jobs=1)
                 
        # 2 SMOTEENN
        elif oversamp_method == 'SMOTEENN':    
            oversamp = SMOTEENN()

        # 3 SMOTE TOMEK
        # NOTE: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.65.3904&rep=rep1&type=pdf
        elif oversamp_method == 'SMOTETomek':
            oversamp = SMOTETomek()

        # 4 ADASYN
        elif oversamp_method == 'ADASYN':
            oversamp = ADASYN(ratio='auto', random_state=None, k=None, n_neighbors=5, n_jobs=cpu_threads)
 
        tr_features_balanced, tr_labels_balanced  = oversamp.fit_sample(tr_features, tr_labels)
        # TODO Write data oversampled!
        print("Writing oversampled data at: " + oversamp_features_pickle_name + " ...")
        np.savetxt('mit_db/' + oversamp_features_name + '_DS1_labels.csv', tr_labels_balanced.astype(int), '%.0f') 
        f = open(oversamp_features_pickle_name, 'wb')
        pickle.dump(tr_features_balanced, f, 2)
        f.close

    end = time.time()

    count = collections.Counter(tr_labels_balanced)
    print("Oversampling balance")
    print(count)
    print("Time required: " + str(format(end - start, '.2f')) + " sec" )

    return tr_features_balanced, tr_labels_balanced