Python sklearn.preprocessing.Normalizer() Examples

The following are 30 code examples of sklearn.preprocessing.Normalizer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function

Example #1

Source File: train.py From skorch with BSD 3-Clause "New" or "Revised" License

7 votes

def get_model(with_pipeline=False):
    """Get a multi-layer perceptron model.

    Optionally, put it in a pipeline that scales the data.

    """
    model = NeuralNetClassifier(MLPClassifier)
    if with_pipeline:
        model = Pipeline([
            ('scale', FeatureUnion([
                ('minmax', MinMaxScaler()),
                ('normalize', Normalizer()),
            ])),
            ('select', SelectKBest(k=N_FEATURES)),  # keep input size constant
            ('net', model),
        ])
    return model

Example #2

Source File: bow.py From broca with MIT License

6 votes

def __init__(self, min_df=1, max_df=0.9, tokenizer=LemmaTokenizer, hash=False):
        """
        `min_df` is set to filter out extremely rare words,
        since we don't want those to dominate the distance metric.

        `max_df` is set to filter out extremely common words,
        since they don't convey much information.
        """

        # Wrap the specified tokenizer
        t = Tokenizer(tokenizer())

        if hash:
            vectr = HashingVectorizer(input='content', stop_words='english', lowercase=True, tokenizer=t)
        else:
            vectr = CountVectorizer(input='content', stop_words='english', lowercase=True, tokenizer=t, min_df=min_df, max_df=max_df)

        args = [
            ('vectorizer', vectr),
            ('tfidf', TfidfTransformer(norm=None, use_idf=True, smooth_idf=True)),
            ('normalizer', Normalizer(copy=False))
        ]

        self.pipeline = Pipeline(args)
        self.trained = False

Example #3

Source File: gp_repurposer.py From xfer with Apache License 2.0

6 votes

def __init__(self, source_model: mx.mod.Module, feature_layer_names, context_function=mx.context.cpu, num_devices=1,
                 max_function_evaluations=100, apply_l2_norm=False):
        # Call base class constructor with parameters required for meta-models
        super().__init__(source_model, feature_layer_names, context_function, num_devices)
        self.max_function_evaluations = max_function_evaluations
        self.apply_l2_norm = apply_l2_norm

        # Mean of features to use for normalization. Computed in training phase.
        # Used to normalize features in training and in prediction.
        self.feature_mean = None

        # Optimizer to use for training GP model
        self.optimizer = 'lbfgs'

        # Number of inducing points to use for sparse GP
        self.NUM_INDUCING_SPARSE_GP = 100

        # Normalizer to use when apply_l2_norm flag is set
        self.l2_normalizer = Normalizer(norm='l2')

Example #4

Source File: test_scikit.py From pliers with BSD 3-Clause "New" or "Revised" License

6 votes

def test_within_pipeline():
    pytest.importorskip('cv2')
    pytest.importorskip('sklearn')
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import Normalizer
    stim = join(get_test_data_path(), 'image', 'apple.jpg')
    graph = Graph([BrightnessExtractor(), SharpnessExtractor()])
    trans = PliersTransformer(graph)
    normalizer = Normalizer()
    pipeline = Pipeline([('pliers', trans), ('normalizer', normalizer)])
    res = pipeline.fit_transform(stim)
    assert res.shape == (1, 2)
    assert np.isclose(res[0][0], 0.66393, 1e-5)
    assert np.isclose(res[0][1], 0.74780, 1e-5)
    meta = trans.metadata_
    assert 'onset' in meta.columns
    assert meta['class'][0] == 'ImageStim'

Example #5

Source File: test_normalizer.py From coremltools with BSD 3-Clause "New" or "Revised" License

6 votes

def test_random(self):
        # Generate some random data_imputeValue.multiArrayValue[i]
        X = _np.random.random(size=(50, 3))

        for param in ("l1", "l2", "max"):
            cur_model = Normalizer(norm=param)

            output = cur_model.fit_transform(X)

            spec = converter.convert(cur_model, ["a", "b", "c"], "out")

            evaluate_transformer(
                spec,
                [dict(zip(["a", "b", "c"], row)) for row in X],
                [{"out": row} for row in output],
            )

Example #6

Source File: test_one_hot_encoder.py From coremltools with BSD 3-Clause "New" or "Revised" License

6 votes

def test_boston_OHE_pipeline(self):
        data = load_boston()

        for categorical_features in [[3], [8], [3, 8], [8, 3]]:
            # Put it in a pipeline so that we can test whether the output dimension
            # handling is correct.

            model = Pipeline(
                [
                    ("OHE", OneHotEncoder(categorical_features=categorical_features)),
                    ("Normalizer", Normalizer()),
                ]
            )

            model.fit(data.data.copy(), data.target)

            # Convert the model
            spec = sklearn.convert(model, data.feature_names, "out").get_spec()

            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out": row} for row in model.transform(data.data.copy())]

            result = evaluate_transformer(spec, input_data, output_data)

            assert result["num_errors"] == 0

Example #7

Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

6 votes

def test_kneighbors_with_or_without_self_hit(LSH: callable, metric, n_jobs, verbose):
    X, y = make_classification(random_state=234)
    X = Normalizer().fit_transform(X)
    lsh = LSH(metric=metric, n_jobs=n_jobs, verbose=verbose)
    lsh.fit(X, y)
    neigh_dist, neigh_ind = lsh.kneighbors(return_distance=True)
    neigh_dist_self, neigh_ind_self = lsh.kneighbors(X, return_distance=True)

    ind_only = lsh.kneighbors(return_distance=False)
    ind_only_self = lsh.kneighbors(X, return_distance=False)

    assert_array_equal(neigh_ind, ind_only)
    assert_array_equal(neigh_ind_self, ind_only_self)

    assert (neigh_ind - neigh_ind_self).mean() <= .01, f'More than 1% of neighbors mismatch'
    assert ((neigh_dist - neigh_dist_self) < 0.0001).mean() <= 0.01,\
        f'Not almost equal to 4 decimals in more than 1% of neighbor slots'

Example #8

Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

6 votes

def test_radius_neighbors_with_or_without_self_hit(LSH, metric, n_jobs, verbose):
    X, y = make_classification()
    X = Normalizer().fit_transform(X)
    lsh = LSH(metric=metric, n_jobs=n_jobs, verbose=verbose)
    lsh.fit(X, y)
    radius = lsh.kneighbors(n_candidates=3)[0][:, 2].max()
    neigh_dist, neigh_ind = lsh.radius_neighbors(return_distance=True, radius=radius)
    neigh_dist_self, neigh_ind_self = lsh.radius_neighbors(X, return_distance=True, radius=radius)

    ind_only = lsh.radius_neighbors(return_distance=False, radius=radius)
    ind_only_self = lsh.radius_neighbors(X, return_distance=False, radius=radius)

    assert len(neigh_ind) == len(neigh_ind_self) == len(neigh_dist) == len(neigh_dist_self)
    for i in range(len(neigh_ind)):
        assert_array_equal(neigh_ind[i], ind_only[i])
        assert_array_equal(neigh_ind_self[i], ind_only_self[i])

        assert_array_equal(neigh_ind[i][:3],
                           neigh_ind_self[i][1:4])
        assert_array_almost_equal(neigh_dist[i][:3],
                                  neigh_dist_self[i][1:4])

Example #9

Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

6 votes

def test_squared_euclidean_same_neighbors_as_euclidean(LSH):
    X, y = make_classification(random_state=234)
    X = Normalizer().fit_transform(X)
    lsh = LSH(metric='minkowski')
    lsh.fit(X, y)
    neigh_dist_eucl, neigh_ind_eucl = lsh.kneighbors()

    lsh_sq = LSH(metric='sqeuclidean')
    lsh_sq.fit(X, y)
    neigh_dist_sqeucl, neigh_ind_sqeucl = lsh_sq.kneighbors()

    assert_array_equal(neigh_ind_eucl, neigh_ind_sqeucl)
    assert_array_almost_equal(neigh_dist_eucl ** 2, neigh_dist_sqeucl)

    if LSH in LSH_WITH_RADIUS:
        radius = neigh_dist_eucl[:, 2].max()
        rad_dist_eucl, rad_ind_eucl = lsh.radius_neighbors(radius=radius)
        rad_dist_sqeucl, rad_ind_sqeucl = lsh_sq.radius_neighbors(radius=radius**2)
        for i in range(len(rad_ind_eucl)):
            assert_array_equal(rad_ind_eucl[i], rad_ind_sqeucl[i])
            assert_array_almost_equal(rad_dist_eucl[i] ** 2, rad_dist_sqeucl[i])

Example #10

Source File: models.py From ntua-slp-semeval2018 with MIT License

6 votes

def nbow_model(task, embeddings, word2idx):
    if task == "clf":
        algo = LogisticRegression(C=0.6, random_state=0,
                                  class_weight='balanced')
    elif task == "reg":
        algo = SVR(kernel='linear', C=0.6)
    else:
        raise ValueError("invalid task!")

    embeddings_features = NBOWVectorizer(aggregation=["mean"],
                                         embeddings=embeddings,
                                         word2idx=word2idx,
                                         stopwords=False)

    model = Pipeline([
        ('embeddings-feats', embeddings_features),
        ('normalizer', Normalizer(norm='l2')),
        ('clf', algo)
    ])

    return model

Example #11

Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_make_column_transformer_kwargs():
    scaler = StandardScaler()
    norm = Normalizer()
    ct = make_column_transformer((scaler, 'first'), (norm, ['second']),
                                 n_jobs=3, remainder='drop',
                                 sparse_threshold=0.5)
    assert_equal(ct.transformers, make_column_transformer(
        (scaler, 'first'), (norm, ['second'])).transformers)
    assert_equal(ct.n_jobs, 3)
    assert_equal(ct.remainder, 'drop')
    assert_equal(ct.sparse_threshold, 0.5)
    # invalid keyword parameters should raise an error message
    assert_raise_message(
        TypeError,
        'Unknown keyword arguments: "transformer_weights"',
        make_column_transformer, (scaler, 'first'), (norm, ['second']),
        transformer_weights={'pca': 10, 'Transf': 1}
    )

Example #12

Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)

Example #13

Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

5 votes

def test_falconn_parallel():
    X, y = make_classification(random_state=346)
    X = Normalizer().fit_transform(X)
    lsh = FalconnLSH(n_jobs=1)
    lsh.fit(X, y)
    neigh_dist, neigh_ind = lsh.kneighbors()

    lsh_parallel = FalconnLSH(n_jobs=4)
    lsh_parallel.fit(X, y)
    neigh_dist_parallel, neigh_ind_parallel = lsh_parallel.kneighbors()

    assert_array_equal(neigh_ind, neigh_ind_parallel)
    assert_array_almost_equal(neigh_dist, neigh_dist_parallel)

Example #14

Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License

5 votes

def test_warn_on_invalid_metric(LSH, metric):
    X, y = make_classification(random_state=24643)
    X = Normalizer().fit_transform(X)
    lsh = LSH(metric='euclidean')
    lsh.fit(X, y)
    neigh_dist, neigh_ind = lsh.kneighbors()

    lsh.metric = metric
    with pytest.warns(UserWarning):
        lsh.fit(X, y)
    neigh_dist_inv, neigh_ind_inv = lsh.kneighbors()

    assert_array_equal(neigh_ind, neigh_ind_inv)
    assert_array_almost_equal(neigh_dist, neigh_dist_inv)

Example #15

Source File: pipeline_builder.py From texta with GNU General Public License v3.0

5 votes

def build(self, fields):
        """ Build model Pipeline and Grid Search params
        """
        params = {}
        # Field transform pipeline per field + params
        transformer_list = []

        for field in fields:
            pipe_key = 'pipe_{}'.format(field)
            steps = []    
            steps.append(tuple(['selector', ItemSelector(key=field)]))
            steps.append(self.extractor_list[self.extractor_op].get_step())
            steps.append(self.reductor_list[self.reductor_op].get_step())
            steps.append(self.normalizer_list[self.normalizer_op].get_step())
            transformer_list.append(tuple([pipe_key, Pipeline(steps)]))
            # Nest params inside the union field - Extractor
            p_dict = self.extractor_list[self.extractor_op].get_param()
            for k in p_dict:
                new_k = '{}__{}__{}'.format('union', pipe_key, k)
                params[new_k] = p_dict[k]
            # Nest params inside the union field - Reductor
            p_dict = self.reductor_list[self.reductor_op].get_param()
            for k in p_dict:
                new_k = '{}__{}__{}'.format('union', pipe_key, k)
                params[new_k] = p_dict[k]
            # Nest params inside the union field - Normalizer
            p_dict = self.normalizer_list[self.normalizer_op].get_param()
            for k in p_dict:
                new_k = '{}__{}__{}'.format('union', pipe_key, k)
                params[new_k] = p_dict[k]

        # Classifier pipeline + params
        steps = []
        steps.append(tuple(['union', FeatureUnion(transformer_list=transformer_list)]))
        steps.append(self.classifier_list[self.classifier_op].get_step())
        pipe = Pipeline(steps)
        params.update(self.classifier_list[self.classifier_op].get_param())
        return pipe, params

Example #16

Source File: models.py From ntua-slp-semeval2018 with MIT License

5 votes

def bow_model(task, max_features=10000):
    if task == "clf":
        algo = LogisticRegression(C=0.6, random_state=0,
                                  class_weight='balanced')
    elif task == "reg":
        algo = SVR(kernel='linear', C=0.6)
    else:
        raise ValueError("invalid task!")

    word_features = TfidfVectorizer(ngram_range=(1, 1),
                                    tokenizer=lambda x: x,
                                    analyzer='word',
                                    min_df=5,
                                    # max_df=0.9,
                                    lowercase=False,
                                    use_idf=True,
                                    smooth_idf=True,
                                    max_features=max_features,
                                    sublinear_tf=True)

    model = Pipeline([
        ('bow-feats', word_features),
        ('normalizer', Normalizer(norm='l2')),
        ('clf', algo)
    ])

    return model

Example #17

Source File: sklearn.py From datastories-semeval2017-task4 with MIT License

5 votes

def nbow_model(task, embeddings, word2idx):
    if task == "clf":
        algo = LogisticRegression(C=0.6, random_state=0,
                                  class_weight='balanced')
    elif task == "reg":
        algo = SVR(kernel='linear', C=0.6)
    else:
        raise ValueError("invalid task!")

    embeddings_features = NBOWVectorizer(aggregation=["mean"],
                                         embeddings=embeddings,
                                         word2idx=word2idx,
                                         stopwords=False)

    preprocessor = TextPreProcessor(
        backoff=['url', 'email', 'percent', 'money', 'phone', 'user', 'time',
                 'url',
                 'date', 'number'],
        include_tags={"hashtag", "allcaps", "elongated", "repeated",
                      'emphasis',
                      'censored'},
        fix_html=True,
        segmenter="twitter",
        corrector="twitter",
        unpack_hashtags=True,
        unpack_contractions=True,
        spell_correct_elong=False,
        tokenizer=SocialTokenizer(lowercase=True).tokenize,
        dicts=[emoticons])

    model = Pipeline([
        ('preprocess', CustomPreProcessor(preprocessor, to_list=True)),
        ('embeddings-feats', embeddings_features),
        ('normalizer', Normalizer(norm='l2')),
        ('clf', algo)
    ])

    return model

Example #18

Source File: data_manipulator.py From LSTM_Anomaly_Detector with MIT License

5 votes

def normalize(mat):
    return Normalizer(norm='l2').fit_transform(mat)

Example #19

Source File: test_sklearn_normalizer_converter.py From sklearn-onnx with MIT License

5 votes

def test_model_normalizer(self):
        model = Normalizer(norm="l2")
        model_onnx = convert_sklearn(
            model,
            "scikit-learn normalizer",
            [("input", Int64TensorType([None, 1]))],
        )
        self.assertTrue(model_onnx is not None)
        self.assertTrue(len(model_onnx.graph.node) == 1)

Example #20

Source File: test_sklearn_normalizer_converter.py From sklearn-onnx with MIT License

5 votes

def test_model_normalizer_float(self):
        model = Normalizer(norm="l2")
        model_onnx = convert_sklearn(
            model,
            "scikit-learn normalizer",
            [("input", FloatTensorType([None, 3]))],
        )
        self.assertTrue(model_onnx is not None)
        self.assertTrue(len(model_onnx.graph.node) == 1)
        dump_data_and_model(
            numpy.array([[1, 1, 3], [3, 1, 2]], dtype=numpy.float32),
            model,
            model_onnx,
            basename="SklearnNormalizerL2-SkipDim1",
        )

Example #21

Source File: bnn_classifier.py From xfer with Apache License 2.0

5 votes

def __init__(self, model: gluon.nn.Sequential, var_posterior: VariationalPosterior,
                 normalizer: Normalizer):

        self.model = model
        self.var_posterior = var_posterior
        self.normalizer = normalizer

Example #22

Source File: bnn_repurposer.py From xfer with Apache License 2.0

5 votes

def __init__(self, source_model: mx.mod.Module, feature_layer_names, context_function=mx.cpu, num_devices=1,
                 bnn_context_function=mx.cpu, sigma=100.0, num_layers=1, n_hidden=10, num_samples_mc=3,
                 learning_rate=1e-3, batch_size=20, num_epochs=200, start_annealing=None, end_annealing=None,
                 num_samples_mc_prediction=100, verbose=0):

        # Call base class constructor with parameters required for meta-models
        super().__init__(source_model, feature_layer_names, context_function, num_devices)

        # Initialize BNN specific parameters
        self.sigma = sigma
        self.num_layers = num_layers
        self.n_hidden = n_hidden
        self.num_samples_mc = num_samples_mc
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.num_samples_mc_prediction = num_samples_mc_prediction
        self.verbose = verbose

        self.start_annealing = start_annealing
        self.end_annealing = end_annealing
        self.step_annealing_sample_weight = 1.0 / float(self.end_annealing - self.start_annealing)
        self.annealing_weight = 0.0

        # Initialize variables to track performance
        self.train_acc = []
        self.test_acc = []
        self.moving_loss_total = []
        self.current_loss_total = []
        self.average_loss = []
        self.anneal_weights = []

        # L2 normalization of the features
        self.normalizer = Normalizer(norm='l2')

        self.bnn_context_function = bnn_context_function
        self._context_bnn = self.bnn_context_function()

        # init parameters for constructing network to None. These will be set during repurposing
        self.dim_input = None
        self.num_classes = None

Example #23

Source File: sklearn_example.py From Hunch with Apache License 2.0

5 votes

def train(self, training_data_X, training_data_Y):
        self.normalizer = Normalizer()
        self.svc = svm.SVC(gamma=0.001, C=100.)
        normalised_training_data_X = self.normalizer.fit_transform(training_data_X)
        self.svc.fit(normalised_training_data_X, training_data_Y)

Example #24

Source File: preprocessing.py From open-solution-toxic-comments with MIT License

5 votes

def __init__(self):
        self.normalizer = sk_prep.Normalizer()

Example #25

Source File: test_feature_optimization.py From hyperparameter_hunter with MIT License

5 votes

def normalize(train_inputs, non_train_inputs):
    normalizer = Normalizer()
    train_inputs[train_inputs.columns] = normalizer.fit_transform(train_inputs.values)
    non_train_inputs[train_inputs.columns] = normalizer.transform(non_train_inputs.values)
    return train_inputs, non_train_inputs

Example #26

Source File: topic.py From Python-DevOps with MIT License

5 votes

def train_lsa(corpus,n_topics, max_df=0.95, min_df=2,cleaning=clearstring,stop_words='english'):
    if cleaning is not None:
        for i in range(len(corpus)): corpus[i] = cleaning(corpus[i])
    tfidf_vectorizer = TfidfVectorizer(max_df = max_df, min_df = min_df, stop_words = stop_words)
    tfidf = tfidf_vectorizer.fit_transform(corpus)
    tfidf_features = tfidf_vectorizer.get_feature_names()
    tfidf = Normalizer().fit_transform(tfidf)
    lsa = TruncatedSVD(n_topics).fit(tfidf)
    return TOPIC(tfidf_features,lsa)

Example #27

Source File: misc.py From steppy-toolkit with MIT License

5 votes

def __init__(self):
        super().__init__()
        self.normalizer = Normalizer()

Example #28

Source File: main.py From AutoOut with MIT License

5 votes

def data_cleaning_formatting(X):
    # Basic cleaning
    X = X.fillna(0)
    X = X.fillna('ffill')

    # Encode data
    X = encode_data(X)
    X = Normalizer().fit_transform(X)
    return X

Example #29

Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_make_column_transformer():
    scaler = StandardScaler()
    norm = Normalizer()
    ct = make_column_transformer((scaler, 'first'), (norm, ['second']))
    names, transformers, columns = zip(*ct.transformers)
    assert_equal(names, ("standardscaler", "normalizer"))
    assert_equal(transformers, (scaler, norm))
    assert_equal(columns, ('first', ['second']))

    # XXX remove in v0.22
    with pytest.warns(DeprecationWarning,
                      match='`make_column_transformer` now expects'):
        ct1 = make_column_transformer(([0], norm))
    ct2 = make_column_transformer((norm, [0]))
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
    assert_almost_equal(ct1.fit_transform(X_array),
                        ct2.fit_transform(X_array))

    with pytest.warns(DeprecationWarning,
                      match='`make_column_transformer` now expects'):
        make_column_transformer(('first', 'drop'))

    with pytest.warns(DeprecationWarning,
                      match='`make_column_transformer` now expects'):
        make_column_transformer(('passthrough', 'passthrough'),
                                ('first', 'drop'))

Example #30

Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_make_column_transformer_pandas():
    pd = pytest.importorskip('pandas')
    X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
    X_df = pd.DataFrame(X_array, columns=['first', 'second'])
    norm = Normalizer()
    # XXX remove in v0.22
    with pytest.warns(DeprecationWarning,
                      match='`make_column_transformer` now expects'):
        ct1 = make_column_transformer((X_df.columns, norm))
    ct2 = make_column_transformer((norm, X_df.columns))
    assert_almost_equal(ct1.fit_transform(X_df),
                        ct2.fit_transform(X_df))