Python sklearn.preprocessing.Binarizer() Examples

The following are 12 code examples of sklearn.preprocessing.Binarizer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .
Example #1
Source File: plot.py    From graspy with Apache License 2.0 6 votes vote down vote up
def _transform(arr, method):
    if method is not None:
        if method in ["log", "log10"]:
            # arr = np.log(arr, where=(arr > 0))
            # hacky, but np.log(arr, where=arr>0) is really buggy
            arr = arr.copy()
            if method == "log":
                arr[arr > 0] = np.log(arr[arr > 0])
            else:
                arr[arr > 0] = np.log10(arr[arr > 0])
        elif method in ["zero-boost", "simple-all", "simple-nonzero"]:
            arr = pass_to_ranks(arr, method=method)
        elif method == "binarize":
            transformer = Binarizer().fit(arr)
            arr = transformer.transform(arr)
        else:
            msg = "Transform must be one of {log, log10, binarize, zero-boost, simple-all, \
            simple-nonzero, not {}.".format(
                method
            )
            raise ValueError(msg)

    return arr 
Example #2
Source File: _supported_operators.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def get_model_alias(model_type):
    """
    Get alias model. Raise an exception if not found.

    :param model_type:  A scikit-learn object (e.g., SGDClassifier
                        and Binarizer)
    :return: A string which stands for the type of the input model in
             our conversion framework
    """
    res = _get_sklearn_operator_name(model_type)
    if res is None:
        raise RuntimeError("Unable to find alias for model '{}'. "
                           "The converter is likely missing."
                           "".format(type(model_type)))
    return res


# registered converters 
Example #3
Source File: test_sklearn_binarizer_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_model_binarizer(self):
        data = np.array([[1., -1., 2.],
                         [2., 0., 0.],
                         [0., 1., -1.]], dtype=np.float32)
        model = Binarizer(threshold=0.5)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn binarizer",
            [("input", FloatTensorType(data.shape))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            data,
            model,
            model_onnx,
            basename="SklearnBinarizer-SkipDim1",
        ) 
Example #4
Source File: test_onnx_helper.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_onnx_helper_load_save(self):
        model = make_pipeline(StandardScaler(), Binarizer(threshold=0.5))
        X = numpy.array([[0.1, 1.1], [0.2, 2.2]])
        model.fit(X)
        model_onnx = convert_sklearn(model, "binarizer",
                                     [("input", FloatTensorType([None, 2]))])
        filename = "temp_onnx_helper_load_save.onnx"
        save_onnx_model(model_onnx, filename)
        model = load_onnx_model(filename)
        new_model = select_model_inputs_outputs(model, "variable")
        assert new_model.graph is not None

        tr1 = self.get_model(model)
        tr2 = self.get_model(new_model)
        X = X.astype(numpy.float32)
        X1 = tr1(X)
        X2 = tr2(X)
        assert X1.shape == (2, 2)
        assert X2.shape == (2, 2) 
Example #5
Source File: test_onnx_helper.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_onnx_helper_load_save_init(self):
        model = make_pipeline(
            Binarizer(),
            OneHotEncoder(sparse=False, handle_unknown='ignore'),
            StandardScaler())
        X = numpy.array([[0.1, 1.1], [0.2, 2.2], [0.4, 2.2], [0.2, 2.4]])
        model.fit(X)
        model_onnx = convert_sklearn(model, "pipe3",
                                     [("input", FloatTensorType([None, 2]))])
        filename = "temp_onnx_helper_load_save.onnx"
        save_onnx_model(model_onnx, filename)
        model = load_onnx_model(filename)
        new_model = select_model_inputs_outputs(model, "variable")
        assert new_model.graph is not None

        tr1 = self.get_model(model)
        tr2 = self.get_model(new_model)
        X = X.astype(numpy.float32)
        X1 = tr1(X)
        X2 = tr2(X)
        assert X1.shape == (4, 2)
        assert X2.shape == (4, 2) 
Example #6
Source File: test_preprocessing.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler) 
Example #7
Source File: test_preprocessing.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_transform_1d_frame_int(self):
        arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
        idx = pd.Index('a b c d e f g h i'.split(' '))
        df = pdml.ModelFrame(arr, index=idx, columns=['X'])
        self.assertEqual(len(df.columns), 1)

        # reshape arr to 2d
        arr = arr.reshape(-1, 1)

        if pd.compat.PY3:
            models = ['Binarizer', 'Imputer', 'StandardScaler']
            # MinMaxScalar raises TypeError in ufunc
        else:
            models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler']

        for model in models:
            mod1 = getattr(df.preprocessing, model)()
            mod2 = getattr(pp, model)()

            self._assert_transform(df, arr, mod1, mod2)

            mod1 = getattr(df.preprocessing, model)()
            mod2 = getattr(pp, model)()
            self._assert_fit_transform(df, arr, mod1, mod2) 
Example #8
Source File: vectorization.py    From atap with Apache License 2.0 5 votes vote down vote up
def sklearn_one_hot_vectorize(corpus):
    # The Sklearn one hot vectorize method

    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.preprocessing import Binarizer

    freq    = CountVectorizer()
    vectors = freq.fit_transform(corpus)

    print(len(vectors.toarray()[0]))

    onehot  = Binarizer()
    vectors = onehot.fit_transform(vectors.toarray())

    print(len(vectors[0])) 
Example #9
Source File: _supported_operators.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def _get_sklearn_operator_name(model_type):
    """
    Get operator name of the input argument

    :param model_type:  A scikit-learn object (e.g., SGDClassifier
                        and Binarizer)
    :return: A string which stands for the type of the input model in
             our conversion framework
    """
    if model_type not in sklearn_operator_name_map:
        # "No proper operator name found, it means a local operator.
        return None
    return sklearn_operator_name_map[model_type] 
Example #10
Source File: test_preprocessing.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_transform_series_int(self):
        arr = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3])
        s = pdml.ModelSeries(arr, index='a b c d e f g h i'.split(' '))

        # reshape arr to 2d
        arr = arr.reshape(-1, 1)

        if pd.compat.PY3:
            models = ['Binarizer', 'Imputer', 'StandardScaler']
            # MinMaxScalar raises TypeError in ufunc
        else:
            models = ['Binarizer', 'Imputer', 'StandardScaler', 'MinMaxScaler']

        for model in models:
            mod1 = getattr(s.preprocessing, model)()
            mod2 = getattr(pp, model)()
            s.fit(mod1)
            mod2.fit(arr)

            result = s.transform(mod1)
            expected = mod2.transform(arr).flatten()

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected)

            mod1 = getattr(s.preprocessing, model)()
            mod2 = getattr(pp, model)()

            result = s.fit_transform(mod1)
            expected = mod2.fit_transform(arr).flatten()

            self.assertIsInstance(result, pdml.ModelSeries)
            self.assert_numpy_array_almost_equal(result.values, expected) 
Example #11
Source File: myalexnet_feature.py    From visual-search with MIT License 4 votes vote down vote up
def main():
    x, fc6 = initModel()
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    img_names = load_image_names(args.input_data_dir)

    with open(args.output_image_name_file, 'w') as img_names_file:
        for img_name in img_names:
            img_names_file.write(img_name + '\n')

    t = time.time()
    # 图像太多了,必须分批次
    batch_size = 100
    features = []

    with open(args.output_feature_file, 'w') as output_file:
        for i in range(0, int(math.ceil(len(img_names) / (batch_size * 1.0)))):
            print('batch: %d' % i)
            if (i + 1) * batch_size < len(img_names):
                img_names_batch = img_names[i * batch_size:(i + 1) * batch_size]
            else:
                img_names_batch = img_names[i * batch_size:len(img_names)]
            img_batch = load_images(img_names_batch)
            output = sess.run(fc6, feed_dict={x: img_batch})
            features.append(output)
        features = np.vstack(features)
        # binarizer = preprocessing.Binarizer().fit(features)
        # features = binarizer.transform(features)
        np.save(output_file, features)

    # with open('fc6.npy', 'w') as output_file:
    #     for i in range(0, int(math.ceil(len(imgs) / (batch_size * 1.0)))):
    #         print('batch: %d' % i)
    #         if (i + 1) * batch_size < len(imgs):
    #             img_batch = imgs[i * batch_size:(i + 1) * batch_size]
    #         else:
    #             img_batch = imgs[i * batch_size: len(imgs)]
    #         output = sess.run(fc6, feed_dict={x: img_batch})
    #         features.append(output)
    #     features = np.vstack(features)
    #     np.save(output_file, features)

    print(time.time() - t) 
Example #12
Source File: visual_search.py    From visual-search with MIT License 4 votes vote down vote up
def main():
    t = time.time()
    img = imread(args.img_file_path)
    imgs = [img, watermark(img), rotate(img), crop(img), mirror(img)]
    imgs_norm = image_normalize(imgs)
    dataset_features = np.load('fc6.npy')

    query_start = time.time()
    query_features = extract_feature(imgs_norm)
    binarizer = preprocessing.Binarizer().fit(query_features)
    query_features = binarizer.transform(query_features)
    print(dataset_features)
    # https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html#scipy.spatial.distance.cdist
    cosine = distance.cdist(dataset_features, query_features, 'cosine')
    print(cosine.shape)
    dis = cosine
    inds_all = argsort(dis, axis=0)  # 按列排序 https://docs.scipy.org/doc/numpy/reference/generated/numpy.argsort.html
    print('query cost: %f, dataset: %d, query: %d' % (time.time() - query_start, len(dataset_features), len(imgs)))
    img_names = load_image_names()
    fig, axes = plt.subplots(5, 11, figsize=(22, 10), subplot_kw={'xticks': [], 'yticks': []})
    fig.subplots_adjust(hspace=0.15, wspace=0.01, left=.02, right=.98, top=.92, bottom=.08)
    titles = ['original', 'watermark', 'rotate', 'crop', 'mirror']
    for i in range(len(imgs)):
        topK = []
        inds = inds_all[:, i]
        # print(inds)
        for k in range(10):
            topK.append(img_names[inds[k]])
            print(inds[k], dis[inds[k], i], img_names[inds[k]])

        original = axes[i, 0]
        original.set_title(titles[i])
        img = imgs[i]
        original.imshow(img)
        for j in range(10):
            ax = axes[i, j + 1]
            img = imread(topK[j])
            ax.imshow(img)
            title = '%d : %f' % (j + 1, dis[inds[j], i])
            ax.set_title(title)

    savePath = args.img_file_path + '_search_result.jpg'
    plt.savefig(savePath)
    print(time.time() - t)
    # os.system('open -a Preview.app -F ' + savePath)