Python sklearn.preprocessing.Binarizer() Examples

The following are 12 code examples for showing how to use sklearn.preprocessing.Binarizer(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .

Example 1
Project: graspy   Author: neurodata   File: plot.py    License: Apache License 2.0 6 votes vote down vote up
def _transform(arr, method):
    if method is not None:
        if method in ["log", "log10"]:
            # arr = np.log(arr, where=(arr > 0))
            # hacky, but np.log(arr, where=arr>0) is really buggy
            arr = arr.copy()
            if method == "log":
                arr[arr > 0] = np.log(arr[arr > 0])
            else:
                arr[arr > 0] = np.log10(arr[arr > 0])
        elif method in ["zero-boost", "simple-all", "simple-nonzero"]:
            arr = pass_to_ranks(arr, method=method)
        elif method == "binarize":
            transformer = Binarizer().fit(arr)
            arr = transformer.transform(arr)
        else:
            msg = "Transform must be one of {log, log10, binarize, zero-boost, simple-all, \
            simple-nonzero, not {}.".format(
                method
            )
            raise ValueError(msg)

    return arr 
Example 2
Project: sklearn-onnx   Author: onnx   File: _supported_operators.py    License: MIT License 6 votes vote down vote up
def get_model_alias(model_type):
    """
    Get alias model. Raise an exception if not found.

    :param model_type:  A scikit-learn object (e.g., SGDClassifier
                        and Binarizer)
    :return: A string which stands for the type of the input model in
             our conversion framework
    """
    res = _get_sklearn_operator_name(model_type)
    if res is None:
        raise RuntimeError("Unable to find alias for model '{}'. "
                           "The converter is likely missing."
                           "".format(type(model_type)))
    return res


# registered converters 
Example 3
Project: sklearn-onnx   Author: onnx   File: test_sklearn_binarizer_converter.py    License: MIT License 6 votes vote down vote up
def test_model_binarizer(self):
        data = np.array([[1., -1., 2.],
                         [2., 0., 0.],
                         [0., 1., -1.]], dtype=np.float32)
        model = Binarizer(threshold=0.5)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn binarizer",
            [("input", FloatTensorType(data.shape))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            data,
            model,
            model_onnx,
            basename="SklearnBinarizer-SkipDim1",
        ) 
Example 4
Project: sklearn-onnx   Author: onnx   File: test_onnx_helper.py    License: MIT License 6 votes vote down vote up
def test_onnx_helper_load_save(self):
        model = make_pipeline(StandardScaler(), Binarizer(threshold=0.5))
        X = numpy.array([[0.1, 1.1], [0.2, 2.2]])
        model.fit(X)
        model_onnx = convert_sklearn(model, "binarizer",
                                     [("input", FloatTensorType([None, 2]))])
        filename = "temp_onnx_helper_load_save.onnx"
        save_onnx_model(model_onnx, filename)
        model = load_onnx_model(filename)
        new_model = select_model_inputs_outputs(model, "variable")
        assert new_model.graph is not None

        tr1 = self.get_model(model)
        tr2 = self.get_model(new_model)
        X = X.astype(numpy.float32)
        X1 = tr1(X)
        X2 = tr2(X)
        assert X1.shape == (2, 2)
        assert X2.shape == (2, 2) 
Example 5
Project: sklearn-onnx   Author: onnx   File: test_onnx_helper.py    License: MIT License 6 votes vote down vote up
def test_onnx_helper_load_save_init(self):
        model = make_pipeline(
            Binarizer(),
            OneHotEncoder(sparse=False, handle_unknown='ignore'),
            StandardScaler())
        X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
        model.fit(X)
        model_onnx = convert_sklearn(model, "pipe3",
                                     [("input", FloatTensorType([None, 2]))])
        filename = "temp_onnx_helper_load_save.onnx"
        save_onnx_model(model_onnx, filename)
        model = load_onnx_model(filename)
        new_model = select_model_inputs_outputs(model, "variable")
        assert new_model.graph is not None

        tr1 = self.get_model(model)
        tr2 = self.get_model(new_model)
        X = X.astype(numpy.float32)
        X1 = tr1(X)
        X2 = tr2(X)
        assert X1.shape == (4, 2)
        assert X2.shape == (4, 2) 
Example 6
Project: pandas-ml   Author: pandas-ml   File: test_preprocessing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler) 
Example 7
Project: pandas-ml   Author: pandas-ml   File: test_preprocessing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_transform_1d_frame_int(self):
        arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
        idx = pd.Index('a b c d e f g h i'.split(' '))
        df = pdml.ModelFrame(arr, index=idx, columns=['X'])
        self.assertEqual(len(df.columns), 1)

        # reshape arr to 2d
        arr = arr.reshape(-1, 1)

        if pd.compat.PY3:
            models = ['Binarizer', 'Imputer', 'StandardScaler']
            # MinMaxScalar raises TypeError in ufunc
        else:
            models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler']

        for model in models:
            mod1 = getattr(df.preprocessing, model)()
            mod2 = getattr(pp, model)()

            self._assert_transform(df, arr, mod1, mod2)

            mod1 = getattr(df.preprocessing, model)()
            mod2 = getattr(pp, model)()
            self._assert_fit_transform(df, arr, mod1, mod2) 
Example 8
Project: atap   Author: foxbook   File: vectorization.py    License: Apache License 2.0 5 votes vote down vote up
def sklearn_one_hot_vectorize(corpus):
    # The Sklearn one hot vectorize method

    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.preprocessing import Binarizer

    freq    = CountVectorizer()
    vectors = freq.fit_transform(corpus)

    print(len(vectors.toarray()[0]))

    onehot  = Binarizer()
    vectors = onehot.fit_transform(vectors.toarray())

    print(len(vectors[0])) 
Example 9
Project: sklearn-onnx   Author: onnx   File: _supported_operators.py    License: MIT License 5 votes vote down vote up
def _get_sklearn_operator_name(model_type):
    """
    Get operator name of the input argument

    :param model_type:  A scikit-learn object (e.g., SGDClassifier
                        and Binarizer)
    :return: A string which stands for the type of the input model in
             our conversion framework
    """
    if model_type not in sklearn_operator_name_map:
        # "No proper operator name found, it means a local operator.
        return None
    return sklearn_operator_name_map[model_type] 
Example 10
Project: pandas-ml   Author: pandas-ml   File: test_preprocessing.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_transform_series_int(self):
        arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
        s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' '))

        # reshape arr to 2d
        arr = arr.reshape(-1, 1)

        if pd.compat.PY3:
            models = ['Binarizer', 'Imputer', 'StandardScaler']
            # MinMaxScalar raises TypeError in ufunc
        else:
            models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler']

        for model in models:
            mod1 = getattr(s.preprocessing, model)()
            mod2 = getattr(pp, model)()
            s.fit(mod1)
            mod2.fit(arr)

            result = s.transform(mod1)
            expected = mod2.transform(arr).flatten()

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected)

            mod1 = getattr(s.preprocessing, model)()
            mod2 = getattr(pp, model)()

            result = s.fit_transform(mod1)
            expected = mod2.fit_transform(arr).flatten()

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected) 
Example 11
Project: visual-search   Author: GYXie   File: myalexnet_feature.py    License: MIT License 4 votes vote down vote up
def main():
    x, fc6 = initModel()
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    img_names = load_image_names(args.input_data_dir)

    with open(args.output_image_name_file, 'w') as img_names_file:
        for img_name in img_names:
            img_names_file.write(img_name + '\n')

    t = time.time()
    # 图像太多了,必须分批次
    batch_size = 100
    features = []

    with open(args.output_feature_file, 'w') as output_file:
        for i in range(0, int(math.ceil(len(img_names) / (batch_size * 1.0)))):
            print('batch: %d' % i)
            if (i + 1) * batch_size < len(img_names):
                img_names_batch = img_names[i * batch_size:(i + 1) * batch_size]
            else:
                img_names_batch = img_names[i * batch_size:len(img_names)]
            img_batch = load_images(img_names_batch)
            output = sess.run(fc6, feed_dict={x: img_batch})
            features.append(output)
        features = np.vstack(features)
        # binarizer = preprocessing.Binarizer().fit(features)
        # features = binarizer.transform(features)
        np.save(output_file, features)

    # with open('fc6.npy', 'w') as output_file:
    #     for i in range(0, int(math.ceil(len(imgs) / (batch_size * 1.0)))):
    #         print('batch: %d' % i)
    #         if (i + 1) * batch_size < len(imgs):
    #             img_batch = imgs[i * batch_size:(i + 1) * batch_size]
    #         else:
    #             img_batch = imgs[i * batch_size: len(imgs)]
    #         output = sess.run(fc6, feed_dict={x: img_batch})
    #         features.append(output)
    #     features = np.vstack(features)
    #     np.save(output_file, features)

    print(time.time() - t) 
Example 12
Project: visual-search   Author: GYXie   File: visual_search.py    License: MIT License 4 votes vote down vote up
def main():
    t = time.time()
    img = imread(args.img_file_path)
    imgs = [img, watermark(img), rotate(img), crop(img), mirror(img)]
    imgs_norm = image_normalize(imgs)
    dataset_features = np.load('fc6.npy')

    query_start = time.time()
    query_features = extract_feature(imgs_norm)
    binarizer = preprocessing.Binarizer().fit(query_features)
    query_features = binarizer.transform(query_features)
    print(dataset_features)
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html#scipy.spatial.distance.cdist
    cosine = distance.cdist(dataset_features, query_features, 'cosine')
    print(cosine.shape)
    dis = cosine
    inds_all = argsort(dis, axis=0)  # 按列排序 https://docs.scipy.org/doc/numpy/reference/generated/numpy.argsort.html
    print('query cost: %f, dataset: %d, query: %d' % (time.time() - query_start, len(dataset_features), len(imgs)))
    img_names = load_image_names()
    fig, axes = plt.subplots(5, 11, figsize=(22, 10), subplot_kw={'xticks': [], 'yticks': []})
    fig.subplots_adjust(hspace=0.15, wspace=0.01, left=.02, right=.98, top=.92, bottom=.08)
    titles = ['original', 'watermark', 'rotate', 'crop', 'mirror']
    for i in range(len(imgs)):
        topK = []
        inds = inds_all[:, i]
        # print(inds)
        for k in range(10):
            topK.append(img_names[inds[k]])
            print(inds[k], dis[inds[k], i], img_names[inds[k]])

        original = axes[i, 0]
        original.set_title(titles[i])
        img = imgs[i]
        original.imshow(img)
        for j in range(10):
            ax = axes[i, j + 1]
            img = imread(topK[j])
            ax.imshow(img)
            title = '%d : %f' % (j + 1, dis[inds[j], i])
            ax.set_title(title)

    savePath = args.img_file_path + '_search_result.jpg'
    plt.savefig(savePath)
    print(time.time() - t)
    # os.system('open -a Preview.app -F ' + savePath)