Python sklearn.preprocessing.MaxAbsScaler() Examples

The following are 19 code examples for showing how to use sklearn.preprocessing.MaxAbsScaler(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .

Example 1
Project: nyoka   Author: nyoka-pmml   File: testScoreWithAdapaXgboost.py    License: Apache License 2.0 6 votes vote down vote up
def test_01_xgb_classifier(self):
        print("\ntest 01 (xgb classifier with preprocessing) [multi-class]\n")
        model = XGBClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MaxAbsScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y)
        file_name = "test01xgboost.pmml"
        xgboost_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example 2
Project: nyoka   Author: nyoka-pmml   File: testScoreWithAdapaLgbm.py    License: Apache License 2.0 6 votes vote down vote up
def test_02_lgbm_classifier(self):
        print("\ntest 02 (lgbm classifier with preprocessing) [multi-class]\n")
        model = LGBMClassifier()
        pipeline_obj = Pipeline([
            ('scaler',MaxAbsScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(self.X,self.Y)
        file_name = "test02lgbm.pmml"
        lgb_to_pmml(pipeline_obj, self.features, 'Species', file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, self.test_file)
        model_pred = pipeline_obj.predict(self.X)
        model_prob = pipeline_obj.predict_proba(self.X)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example 3
Project: Semantic-Texual-Similarity-Toolkits   Author: rgtjf   File: classifier.py    License: MIT License 6 votes vote down vote up
def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)
        clf = RandomForestRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf 
Example 4
Project: Semantic-Texual-Similarity-Toolkits   Author: rgtjf   File: classifier.py    License: MIT License 6 votes vote down vote up
def train_model(self, train_file_path, model_path):
        print("==> Load the data ...")
        X_train, Y_train = self.load_file(train_file_path)
        print(train_file_path, shape(X_train))

        print("==> Train the model ...")
        min_max_scaler = preprocessing.MaxAbsScaler()
        X_train_minmax = min_max_scaler.fit_transform(X_train)

        clf = GradientBoostingRegressor(n_estimators=self.n_estimators)
        clf.fit(X_train_minmax.toarray(), Y_train)

        print("==> Save the model ...")
        pickle.dump(clf, open(model_path, 'wb'))

        scaler_path = model_path.replace('.pkl', '.scaler.pkl')
        pickle.dump(min_max_scaler, open(scaler_path, 'wb'))
        return clf 
Example 5
Project: adviser   Author: DigitalPhonetics   File: data_utils.py    License: GNU General Public License v3.0 6 votes vote down vote up
def normalize_cv(X, y, i, norm="zero_score"):
    X_test = X[i]
    y_test = y[i]
    X_train = pd.concat(X[:i] + X[i+1:])
    y_train = pd.concat(y[:i] + y[i+1:])
    if norm == "min_max":
        scaler = preprocessing.MinMaxScaler()
    elif norm == "max_abs":
        scaler = preprocessing.MaxAbsScaler()
    else:
        scaler = preprocessing.StandardScaler()
    X_train = pd.DataFrame(scaler.fit_transform(X_train),
                           index=y_train.index.values)
    X_train.columns = X[i].columns.values
    X_test = pd.DataFrame(scaler.transform(X_test), index=y_test.index.values)
    X_test.columns = X[i].columns.values
    return X_train, X_test, y_train, y_test 
Example 6
Project: pyts   Author: johannfaouzi   File: scaler.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def transform(self, X):
        """Scale the data.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_timestamps)
            Data to scale.

        Returns
        -------
        X_new : array-like, shape = (n_samples, n_timestamps)
            Scaled data.

        """
        X = check_array(X, dtype='float64')
        scaler = SklearnMaxAbsScaler()
        X_new = scaler.fit_transform(X.T).T
        return X_new 
Example 7
Project: pandas-ml   Author: pandas-ml   File: test_preprocessing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler) 
Example 8
Project: Benchmarks   Author: ECP-CANDLE   File: reg_go2.py    License: MIT License 6 votes vote down vote up
def load_data():

    data_path = args['in']
        
    df = (pd.read_csv(data_path,skiprows=1).values).astype('float32')

    df_y = df[:,0].astype('float32')
    df_x = df[:, 1:PL].astype(np.float32)


#    scaler = MaxAbsScaler()
        
    scaler = StandardScaler()
    df_x = scaler.fit_transform(df_x)
        
    X_train, X_test, Y_train, Y_test = train_test_split(df_x, df_y, test_size= 0.20, random_state=42)
    
    print('x_train shape:', X_train.shape)
    print('x_test shape:', X_test.shape)

    
    return X_train, Y_train, X_test, Y_test 
Example 9
Project: DeepLearning_IDS   Author: clazarom   File: main_dl_experiments.py    License: MIT License 5 votes vote down vote up
def sparse_normalize_dataset(dataset):
    """ Normaliza dataset without removing the sparseness structure of the data """
    #Remove mean of dataset 
    dataset = dataset - np.mean(dataset)
    #Truncate to +/-3 standard deviations and scale to -1 to 1
    std_dev = 3 * np.std(dataset)
    dataset = np.maximum(np.minimum(dataset, std_dev), -std_dev) / std_dev
    #Rescale from [-1, 1] to [0.1, 0.9]
    dataset = (dataset + 1) * 0.4 + 0.1
    #dataset = (dataset-np.amin(dataset))/(np.amax(dataset)-np.amin(dataset))
    return dataset
    #return preprocessing.MaxAbsScaler().fit_transform(dataset) 
Example 10
Project: default-credit-card-prediction   Author: alexpnt   File: preprocessing.py    License: MIT License 5 votes vote down vote up
def scale_by_max_value(X):
	"""
	Scale each feature by its abs maximum value.

	Keyword arguments:
	X -- The feature vectors	
	"""

	if verbose:
		print '\nScaling to the range [-1,1] ...'

	max_abs_scaler = preprocessing.MaxAbsScaler()
	return max_abs_scaler.fit_transform(X) 
Example 11
Project: adviser   Author: DigitalPhonetics   File: data_utils.py    License: GNU General Public License v3.0 5 votes vote down vote up
def normalize(data, norm="zero_score", scaler=None):
    """Normalize pandas Dataframe.

    @param data: Input dataframe
    @param norm: normalization method [default: zero_score standardization],
    alternatives: 'min_max', 'max_abs'
    @return datascaled: normalized dataframe
    """
    if scaler is not None:
        datascaled = pd.DataFrame(scaler.transform(data),
                                  index=data.index.values)
        datascaled.columns = data.columns.values
    else:
        if norm == "min_max":
            scaler = preprocessing.MinMaxScaler()
        elif norm == "max_abs":
            scaler = preprocessing.MaxAbsScaler()
        else:
            scaler = preprocessing.StandardScaler()
        datascaled = pd.DataFrame(scaler.fit_transform(data),
                                  index=data.index.values)
        datascaled.columns = data.columns.values
    return datascaled, scaler


# deprecated - use sklearn.model_selection.train_test_split instead 
Example 12
Project: mindmeld   Author: cisco   File: text_models.py    License: Apache License 2.0 5 votes vote down vote up
def _get_feature_scaler(self):
        """Get a feature value scaler based on the model settings"""
        if self.config.model_settings is None:
            scale_type = None
        else:
            scale_type = self.config.model_settings.get("feature_scaler")
        scaler = {
            "std-dev": StandardScaler(with_mean=False),
            "max-abs": MaxAbsScaler(),
        }.get(scale_type)
        return scaler 
Example 13
Project: mindmeld   Author: cisco   File: memm.py    License: Apache License 2.0 5 votes vote down vote up
def _get_feature_scaler(scale_type):
        """Get a feature value scaler based on the model settings"""
        scaler = {
            "std-dev": StandardScaler(with_mean=False),
            "max-abs": MaxAbsScaler(),
        }.get(scale_type)
        return scaler 
Example 14
Project: Benchmarks   Author: ECP-CANDLE   File: p1b3.py    License: MIT License 5 votes vote down vote up
def scale(df, scaling=None):
    """Scale data included in pandas dataframe.

    Parameters
    ----------
    df : pandas dataframe
        dataframe to scale
    scaling : 'maxabs', 'minmax', 'std', or None, optional (default 'std')
        type of scaling to apply
    """

    if scaling is None or scaling.lower() == 'none':
        return df

    df = df.dropna(axis=1, how='any')

    # Scaling data
    if scaling == 'maxabs':
        # Normalizing -1 to 1
        scaler = MaxAbsScaler()
    elif scaling == 'minmax':
        # Scaling to [0,1]
        scaler = MinMaxScaler()
    else:
        # Standard normalization
        scaler = StandardScaler()

    mat = df.as_matrix()
    mat = scaler.fit_transform(mat)

    df = pd.DataFrame(mat, columns=df.columns)

    return df 
Example 15
Project: Benchmarks   Author: ECP-CANDLE   File: p1b3.py    License: MIT License 5 votes vote down vote up
def impute_and_scale(df, scaling='std'):
    """Impute missing values with mean and scale data included in pandas dataframe.

    Parameters
    ----------
    df : pandas dataframe
        dataframe to impute and scale
    scaling : 'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional (default 'std')
        type of scaling to apply
    """

    df = df.dropna(axis=1, how='all')

    #imputer = Imputer(strategy='mean', axis=0)
    imputer = Imputer(strategy='mean')
    mat = imputer.fit_transform(df)

    if scaling is None or scaling.lower() == 'none':
        return pd.DataFrame(mat, columns=df.columns)

    if scaling == 'maxabs':
        scaler = MaxAbsScaler()
    elif scaling == 'minmax':
        scaler = MinMaxScaler()
    else:
        scaler = StandardScaler()

    mat = scaler.fit_transform(mat)

    df = pd.DataFrame(mat, columns=df.columns)

    return df 
Example 16
Project: Benchmarks   Author: ECP-CANDLE   File: NCI60.py    License: MIT License 5 votes vote down vote up
def impute_and_scale(df, scaling='std'):
    """Impute missing values with mean and scale data included in pandas dataframe.

    Parameters
    ----------
    df : pandas dataframe
        dataframe to impute and scale
    scaling : 'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional (default 'std')
        type of scaling to apply
    """

    df = df.dropna(axis=1, how='all')

    imputer = Imputer(strategy='mean')
    mat = imputer.fit_transform(df)

    if scaling is None or scaling.lower() == 'none':
        return pd.DataFrame(mat, columns=df.columns)

    if scaling == 'maxabs':
        scaler = MaxAbsScaler()
    elif scaling == 'minmax':
        scaler = MinMaxScaler()
    else:
        scaler = StandardScaler()

    mat = scaler.fit_transform(mat)

    df = pd.DataFrame(mat, columns=df.columns)

    return df 
Example 17
Project: Benchmarks   Author: ECP-CANDLE   File: nt3_baseline_keras2.py    License: MIT License 5 votes vote down vote up
def load_data(train_path, test_path, gParameters):

    print('Loading data...')
    df_train = (pd.read_csv(train_path,header=None).values).astype('float32')
    df_test = (pd.read_csv(test_path,header=None).values).astype('float32')
    print('done')

    print('df_train shape:', df_train.shape)
    print('df_test shape:', df_test.shape)

    seqlen = df_train.shape[1]

    df_y_train = df_train[:,0].astype('int')
    df_y_test = df_test[:,0].astype('int')

    Y_train = np_utils.to_categorical(df_y_train,gParameters['classes'])
    Y_test = np_utils.to_categorical(df_y_test,gParameters['classes'])

    df_x_train = df_train[:, 1:seqlen].astype(np.float32)
    df_x_test = df_test[:, 1:seqlen].astype(np.float32)

#        X_train = df_x_train.as_matrix()
#        X_test = df_x_test.as_matrix()

    X_train = df_x_train
    X_test = df_x_test

    scaler = MaxAbsScaler()
    mat = np.concatenate((X_train, X_test), axis=0)
    mat = scaler.fit_transform(mat)

    X_train = mat[:X_train.shape[0], :]
    X_test = mat[X_train.shape[0]:, :]

    return X_train, Y_train, X_test, Y_test 
Example 18
Project: Benchmarks   Author: ECP-CANDLE   File: data_utils.py    License: MIT License 5 votes vote down vote up
def scale_array(mat, scaling=None):
    """ Scale data included in numpy array.
        
        Parameters
        ----------
        mat : numpy array
            Array to scale
        scaling : string
            String describing type of scaling to apply.
            Options recognized: 'maxabs', 'minmax', 'std'.
            'maxabs' : scales data to range [-1 to 1].
            'minmax' : scales data to range [-1 to 1].
            'std'    : scales data to normal variable with mean 0 and standard deviation 1.
            (Default: None, no scaling).

        Return
        ----------
        Returns the numpy array scaled by the method specified. \
        If no scaling method is specified, it returns the numpy \
        array unmodified.
    """
    
    if scaling is None or scaling.lower() == 'none':
        return mat

    # Scaling data
    if scaling == 'maxabs':
        # Scaling to [-1, 1]
        scaler = MaxAbsScaler(copy=False)
    elif scaling == 'minmax':
        # Scaling to [0,1]
        scaler = MinMaxScaler(copy=False)
    else:
        # Standard normalization
        scaler = StandardScaler(copy=False)
    
    return scaler.fit_transform(mat) 
Example 19
Project: Benchmarks   Author: ECP-CANDLE   File: data_utils.py    License: MIT License 4 votes vote down vote up
def drop_impute_and_scale_dataframe(df, scaling='std', imputing='mean', dropna='all'):
    """Impute missing values with mean and scale data included in pandas dataframe.

    Parameters
    ----------
    df : pandas dataframe
        dataframe to process
    scaling : string
        String describing type of scaling to apply.
        'maxabs' [-1,1], 'minmax' [0,1], 'std', or None, optional
        (Default 'std')
    imputing : string
        String describing type of imputation to apply.
        'mean' replace missing values with mean value along the column,
        'median' replace missing values with median value along the column,
        'most_frequent' replace missing values with most frequent value along column
        (Default: 'mean').
    dropna : string
        String describing strategy for handling missing values.
        'all' if all values are NA, drop that column.
        'any' if any NA values are present, dropt that column.
        (Default: 'all').

    Return
    ----------
    Returns the data frame after handling missing values and scaling.

    """

    if dropna:
        df = df.dropna(axis=1, how=dropna)
    else:
        empty_cols = df.columns[df.notnull().sum() == 0]
        df[empty_cols] = 0

    if imputing is None or imputing.lower() == 'none':
        mat = df.values
    else:
#        imputer = Imputer(strategy=imputing, axis=0)
#        imputer = SimpleImputer(strategy=imputing)
        # Next line is from conditional import. axis=0 is default
        # in old version so it is not necessary.
        imputer = Imputer(strategy=imputing)
        mat = imputer.fit_transform(df.values)

    if scaling is None or scaling.lower() == 'none':
        return pd.DataFrame(mat, columns=df.columns)

    if scaling == 'maxabs':
        scaler = MaxAbsScaler()
    elif scaling == 'minmax':
        scaler = MinMaxScaler()
    else:
        scaler = StandardScaler()

    mat = scaler.fit_transform(mat)
    df = pd.DataFrame(mat, columns=df.columns)

    return df