Python sklearn.preprocessing.LabelBinarizer() Examples

The following are 30 code examples for showing how to use sklearn.preprocessing.LabelBinarizer(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.preprocessing , or try the search function .

Example 1
Project: MaliciousMacroBot   Author: egaus   File: mmbot.py    License: MIT License 6 votes vote down vote up
def mmb_evaluate_model(self):
        """
        Returns scores from cross validation evaluation on the malicious / benign classifier
        """
        predictive_features = self.features['predictive_features']
        self.clf_X = self.modeldata[predictive_features].values
        self.clf_y = np.array(self.modeldata['label'])

        X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0)
        lb = LabelBinarizer()
        y_train = np.array([number[0] for number in lb.fit_transform(y_train)])
        eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2)
        eval_cls.fit(X_train, y_train)

        recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall')
        precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision')
        accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy')
        f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro')

        return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall} 
Example 2
Project: recordlinkage   Author: J535D165   File: test_classify.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_sklearn_labelbin(self):

        m = np.array([1.0, .81, .85, .81, .85, .81])
        u = np.array([1.0, .23, .50, .23, .30, 0.13])

        # Create the train dataset.
        X_train, true_links = binary_vectors(
            1000, 500, m=m, u=u, random_state=535, return_links=True)

        binarizer = LabelBinarizer()
        binarizer.fit(X_train.iloc[:, 0])
        assert len(binarizer.classes_) == 1

        binarizer.classes_ = np.array([0, 1])
        assert len(binarizer.classes_) == 2

        binarizer.transform(X_train.iloc[:, 1])
        assert len(binarizer.classes_) == 2 
Example 3
Project: SpectralMachine   Author: feranick   File: SpectraLearnPredict.py    License: GNU General Public License v3.0 6 votes vote down vote up
def formatClass(rootFile, Cl):
    import sklearn.preprocessing as pp
    print('==========================================================================\n')
    print(' Running basic TensorFlow. Creating class data in binary form...')
    Cl2 = pp.LabelBinarizer().fit_transform(Cl)
    
    import matplotlib.pyplot as plt
    plt.hist([float(x) for x in Cl], bins=np.unique([float(x) for x in Cl]), edgecolor="black")
    plt.xlabel('Class')
    plt.ylabel('Occurrances')
    plt.title('Class distibution')
    plt.savefig(rootFile + '_ClassDistrib.png', dpi = 160, format = 'png')  # Save plot
    if tfDef.plotClassDistribTF == True:
        print(' Plotting Class distibution \n')
        plt.show()
    
    return Cl2

#******************************************************************************** 
Example 4
Project: muffnn   Author: civisanalytics   File: test_mlp_classifier.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96) 
Example 5
Project: medical-entity-recognition   Author: baiyyang   File: crf_unit.py    License: Apache License 2.0 6 votes vote down vote up
def bio_classification_report(y_true, y_pred):
    """
    Classification report for a l ist of BIOSE-encoded sequences.
    It computes token-level metrics and discards 'O' labels.
    :param y_true:
    :param y_pred:
    :return:
    """
    lb = LabelBinarizer()
    y_true_combined = lb.fit_transform(y_true)
    y_pred_combined = lb.transform(y_pred)

    tagset = set(lb.classes_) - {'O'}
    tagset = set(lb.classes_)
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {
        cls: idx for idx, cls in enumerate(lb.classes_)
    }

    return classification_report(
        y_true_combined,
        y_pred_combined,
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset
    ) 
Example 6
Project: Jiayan   Author: jiaeyan   File: crf_sent_tagger.py    License: MIT License 6 votes vote down vote up
def eval(self, test_x, test_y, crf_model):
        tagger = pycrfsuite.Tagger()
        tagger.open(crf_model)

        y_pred = []
        for feat_list in test_x:
            preds = tagger.tag(feat_list)
            y_pred.append(preds)

        lb = LabelBinarizer()
        y_true_all = lb.fit_transform(list(chain.from_iterable(test_y)))
        y_pred_all = lb.transform(list(chain.from_iterable(y_pred)))

        tagset = sorted(set(lb.classes_))
        class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

        print(classification_report(
            y_true_all,
            y_pred_all,
            labels=[class_indices[cls] for cls in tagset],
            target_names=tagset,
            digits=5
        )) 
Example 7
Project: Jiayan   Author: jiaeyan   File: crf_pos_tagger.py    License: MIT License 6 votes vote down vote up
def eval(self, test_x, test_y, crf_model):
        tagger = pycrfsuite.Tagger()
        tagger.open(crf_model)

        y_pred = []
        for feat_list in test_x:
            preds = tagger.tag(feat_list)
            y_pred.append(preds)

        lb = LabelBinarizer()
        y_true_all = lb.fit_transform(list(chain.from_iterable(test_y)))
        y_pred_all = lb.transform(list(chain.from_iterable(y_pred)))

        tagset = sorted(set(lb.classes_))
        class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

        print(classification_report(
            y_true_all,
            y_pred_all,
            labels=[class_indices[cls] for cls in tagset],
            target_names=tagset,
            digits=5
        )) 
Example 8
Project: qiskit-aqua   Author: Qiskit   File: one_against_rest.py    License: Apache License 2.0 6 votes vote down vote up
def train(self, x, y):
        """
        Training multiple estimators each for distinguishing a pair of classes.

        Args:
            x (numpy.ndarray): input points
            y (numpy.ndarray): input labels
        Raises:
            Exception: given all data points are assigned to the same class,
                        the prediction would be boring
        """
        self.label_binarizer_ = LabelBinarizer(neg_label=0)
        Y = self.label_binarizer_.fit_transform(y)
        self.classes = self.label_binarizer_.classes_
        columns = (np.ravel(col) for col in Y.T)
        self.estimators = []
        for _, column in enumerate(columns):
            unique_y = np.unique(column)
            if len(unique_y) == 1:
                raise Exception("given all data points are assigned to the same class, "
                                "the prediction would be boring.")
            estimator = self.estimator_cls(*self.params)
            estimator.fit(x, column)
            self.estimators.append(estimator) 
Example 9
Project: OpenCV-3-x-with-Python-By-Example   Author: PacktPublishing   File: training.py    License: MIT License 6 votes vote down vote up
def __init__(self, feature_vector_size, label_words):
        self.ann = cv2.ml.ANN_MLP_create()
        # Number of centroids used to build the feature vectors
        input_size = feature_vector_size
        # Number of models to recongnize
        output_size = len(label_words)
        # Applying Heaton rules
        hidden_size = (input_size * (2 / 3)) + output_size
        nn_config = np.array([input_size, hidden_size, output_size], dtype=np.uint8)
        self.label_words = label_words
        self.ann.setLayerSizes(np.array(nn_config))
        # Symmetrical Sigmoid as activation function
        self.ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM)
        # Map models as tuples of probabilities
        self.le = preprocessing.LabelBinarizer()
        self.le.fit(label_words)  # Label words are ['dress', 'footwear', 'backpack'] 
Example 10
def test_conversion_with_sparse_y(self):
        """Tests conversion of a model that's fitted with y values in a sparse format."""
        from sklearn.model_selection import train_test_split

        X_train, X_test, y_train, y_test = train_test_split(
            self.iris_X, self.iris_y, test_size=0.2, train_size=0.8
        )

        from sklearn import preprocessing

        lb = preprocessing.LabelBinarizer(sparse_output=True)
        binarized_y = lb.fit_transform(y_train)

        sklearn_model = KNeighborsClassifier(algorithm="brute")
        sklearn_model.fit(X_train, binarized_y)

        self.assertRaises(ValueError, sklearn.convert, sklearn_model) 
Example 11
Project: plume   Author: WiseDoge   File: naive_bayes.py    License: MIT License 6 votes vote down vote up
def fit(self, X, y):
        """
        :param X_: shape = [n_samples, n_features] 
        :param y: shape = [n_samples] 
        :return: self
        """
        labelbin = LabelBinarizer()
        Y = labelbin.fit_transform(y)
        self.classes = labelbin.classes_
        self.class_count = np.zeros(Y.shape[1], dtype=np.float64)
        self.feature_count = np.zeros((Y.shape[1], X.shape[1]),
                                      dtype=np.float64)

        self.feature_count += Y.T @ X
        self.class_count += Y.sum(axis=0)
        smoothed_fc = self.feature_count + self.alpha
        smoothed_cc = smoothed_fc.sum(axis=1)

        self.feature_log_prob = (np.log(smoothed_fc) -
                                 np.log(smoothed_cc.reshape(-1, 1))) 
Example 12
Project: plastering   Author: plastering   File: char2ir_gpu.py    License: MIT License 6 votes vote down vote up
def encode_labels(self, label_dict, srcids):
        flat_labels = ['O']
        if self.use_brick_flag:
            with open('brick/tags.json', 'r') as fp:
                brick_tags = json.load(fp)
            flat_labels += ['B_' + tag for tag in brick_tags] + \
                           ['I_' + tag for tag in brick_tags]
        flat_labels += reduce(adder, [reduce(adder, label_dict[srcid].values()) for srcid in srcids])
        self.le = LabelBinarizer().fit(flat_labels)
        stack = []
        for srcid in srcids:
            labels = label_dict[srcid]
            sentences = self.sentence_dict[srcid]
            for metadata_type in self.sentence_dict[srcid].keys():
                labels = label_dict[srcid][metadata_type]
                if len(labels) == 0:
                    encoded = np.zeros((self.max_len, encoded.shape[1]))
                else:
                    encoded = self.le.transform(labels)
                    encoded = np.vstack([encoded, np.zeros(
                                         (self.max_len - encoded.shape[0],
                                          encoded.shape[1]))])
                stack.append(encoded)
        return np.stack(stack) 
Example 13
Project: production-tools   Author: thuijskens   File: train_model.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_mnist_data():
    """Loads the MNIST data set into memory.

    Returns
    -------
    X : array-like, shape=[n_samples, n_features]
        Training data for the MNIST data set.
        
    y : array-like, shape=[n_samples,]
        Labels for the MNIST data set.
    """
    digits = load_digits()
    X, y = digits.data, digits.target
    y = LabelBinarizer().fit_transform(y)

    return X, y 
Example 14
Project: SVM-CNN   Author: dlmacedo   File: elm.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, n_hidden=20, alpha=0.5, rbf_width=1.0,
                 activation_func='tanh', activation_args=None,
                 user_components=None, regressor=None,
                 binarizer=LabelBinarizer(-1, 1),
                 random_state=None):

        super(ELMClassifier, self).__init__(n_hidden=n_hidden,
                                            alpha=alpha,
                                            random_state=random_state,
                                            activation_func=activation_func,
                                            activation_args=activation_args,
                                            user_components=user_components,
                                            rbf_width=rbf_width,
                                            regressor=regressor)

        self.classes_ = None
        self.binarizer = binarizer 
Example 15
Project: sklearn2pmml   Author: jpmml   File: xgboost.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def make_xgboost_dataframe_mapper(dtypes, missing_value_aware = True):
	"""Construct a DataFrameMapper for feeding complex data into an XGBModel.

	Parameters
	----------

	dtypes: iterable of tuples (column, dtype)

	missing_value_aware: boolean
		If true, use missing value aware transformers.

	Returns
	-------
	DataFrameMapper

	"""
	features = list()
	for column, dtype in dtypes.items():
		if _is_categorical(dtype):
			features.append(([column], PMMLLabelBinarizer(sparse_output = True) if missing_value_aware else LabelBinarizer(sparse_output = True)))
		else:
			features.append(([column], None))
	return DataFrameMapper(features) 
Example 16
Project: polylearn   Author: scikit-learn-contrib   File: base.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _check_X_y(self, X, y):

        # helpful error message for sklearn < 1.17
        is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2

        if is_2d or type_of_target(y) != 'binary':
            raise TypeError("Only binary targets supported. For training "
                            "multiclass or multilabel models, you may use the "
                            "OneVsRest or OneVsAll metaestimators in "
                            "scikit-learn.")

        X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',
                         multi_output=False)

        self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
        y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
        return X, y 
Example 17
Project: sklearn-onnx   Author: onnx   File: test_sklearn_label_binariser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_label_binariser_default(self):
        X = np.array([1, 2, 6, 4, 2])
        model = LabelBinarizer().fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn label binariser",
            [("input", Int64TensorType([None]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnLabelBinariserDefault",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 18
Project: sklearn-onnx   Author: onnx   File: test_sklearn_label_binariser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_label_binariser_neg_label(self):
        X = np.array([1, 2, 6, 4, 2])
        model = LabelBinarizer(neg_label=-101).fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn label binariser",
            [("input", Int64TensorType([None]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnLabelBinariserNegLabel",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 19
Project: sklearn-onnx   Author: onnx   File: test_sklearn_label_binariser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_label_binariser_pos_label(self):
        X = np.array([1, 2, 6, 4, 2])
        model = LabelBinarizer(pos_label=123).fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn label binariser",
            [("input", Int64TensorType([None]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnLabelBinariserPosLabel",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 20
Project: sklearn-onnx   Author: onnx   File: test_sklearn_label_binariser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_label_binariser_neg_pos_label(self):
        X = np.array([1, 2, 6, 4, 2])
        model = LabelBinarizer(neg_label=10, pos_label=20).fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn label binariser",
            [("input", Int64TensorType([None]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnLabelBinariserNegPosLabel",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 21
Project: sklearn-onnx   Author: onnx   File: test_sklearn_label_binariser_converter.py    License: MIT License 6 votes vote down vote up
def test_model_label_binariser_binary_labels(self):
        X = np.array([1, 0, 0, 0, 1])
        model = LabelBinarizer().fit(X)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn label binariser",
            [("input", Int64TensorType([None]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnLabelBinariserBinaryLabels",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
Example 22
Project: pandas-ml   Author: pandas-ml   File: test_preprocessing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
        self.assertIs(df.preprocessing.FunctionTransformer,
                      pp.FunctionTransformer)
        self.assertIs(df.preprocessing.Imputer, pp.Imputer)
        self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
        self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
        self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
        self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
        self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
        self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
        self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
        self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
        self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
        self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
        self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler) 
Example 23
Project: pandas-ml   Author: pandas-ml   File: test_preprocessing.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_LabelBinarizer(self):
        arr = np.array([1, 2, 3, 2])
        s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd'])

        mod1 = s.pp.LabelBinarizer()
        s.fit(mod1)
        result = s.transform(mod1)

        expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0]])

        self.assertIsInstance(result, pdml.ModelFrame)
        self.assert_numpy_array_almost_equal(result.values, expected)
        tm.assert_index_equal(result.index, s.index)

        mod1 = s.pp.LabelBinarizer()
        result = s.fit_transform(mod1)

        self.assertIsInstance(result, pdml.ModelFrame)
        self.assert_numpy_array_almost_equal(result.values, expected)

        inversed = result.inverse_transform(mod1)
        self.assertIsInstance(inversed, pdml.ModelFrame)
        self.assert_numpy_array_almost_equal(inversed.values.flatten(), arr)
        tm.assert_index_equal(result.index, s.index) 
Example 24
Project: mindmeld   Author: cisco   File: lstm.py    License: Apache License 2.0 6 votes vote down vote up
def setup_model(self, config):
        self.set_params(**config.params)
        self.label_encoder = LabelBinarizer()
        self.gaz_encoder = LabelBinarizer()

        self.graph = tf.Graph()
        self.saver = None

        self.example_type = config.example_type
        self.features = config.features

        self.query_encoder = WordSequenceEmbedding(
            self.padding_length,
            self.token_embedding_dimension,
            self.token_pretrained_embedding_filepath,
        )

        if self.use_char_embeddings:
            self.char_encoder = CharacterSequenceEmbedding(
                self.padding_length,
                self.character_embedding_dimension,
                self.max_char_per_word,
            ) 
Example 25
Project: mindmeld   Author: cisco   File: lstm.py    License: Apache License 2.0 6 votes vote down vote up
def _gaz_transform(self, list_of_tokens_to_transform):
        """This function is used to handle special logic around SKLearn's LabelBinarizer
        class which behaves in a non-standard way for 2 classes. In a 2 class system,
        it encodes the classes as [0] and [1]. However, in a 3 class system, it encodes
        the classes as [0,0,1], [0,1,0], [1,0,0] and sustains this behavior for num_class > 2.

        We want to encode 2 class systems as [0,1] and [1,0]. This function does that.

        Args:
            list_of_tokens_to_transform (list): A sequence of class labels

        Returns:
            (array): corrected encoding from the binarizer
        """
        output = self.gaz_encoder.transform(list_of_tokens_to_transform)
        if len(self.gaz_encoder.classes_) == 2:
            output = np.hstack((1 - output, output))
        return output 
Example 26
Project: stacked_generalization   Author: fukatani   File: stacking.py    License: Apache License 2.0 6 votes vote down vote up
def _get_child_predict(self, clf, X, index=None):
        if self.stack_by_proba and hasattr(clf, 'predict_proba'):
            if self.save_stage0 and index is not None:
                proba = util.saving_predict_proba(clf, X, index)
            else:
                proba = clf.predict_proba(X)
            return proba[:, 1:]
        elif hasattr(clf, 'predict'):
            predict_result = clf.predict(X)
            if isinstance(clf, ClassifierMixin):
                lb = LabelBinarizer()
                lb.fit(predict_result)
                return lb.fit_transform(predict_result)
            else:
                return predict_result.reshape((predict_result.size, 1))
        else:
            return clf.fit_transform(X) 
Example 27
Project: recordlinkage   Author: J535D165   File: nb_sklearn.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _fit_data(self, X):
        """Binarize the data for each column separately.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        X_transformed : array-like
            Returns the data where in each columns the labels are
            binarized.

        """

        if self.binarize is not None:
            X = binarize(X, threshold=self.binarize)

        for i in range(X.shape[1]):

            # initialise binarizer and save
            binarizer = LabelBinarizer()

            if self.binarize:
                binarizer.classes_ = np.array([0, 1])

            # fit the data to the binarizer
            binarizer.fit(X[:, i])

            self._binarizers.append(binarizer)

        return self._transform_data(X) 
Example 28
Project: recordlinkage   Author: J535D165   File: test_classify.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_sklearn_preinit(self):

        m = np.array([1.0, .81, .85, .81, .85, .81])
        u = np.array([1.0, .23, .50, .23, .30, 0.13])

        # Create the train dataset.
        X_train, true_links = binary_vectors(
            1000, 500, m=m, u=u, random_state=535, return_links=True)

        binarizer = LabelBinarizer()
        binarizer.classes_ = np.array([0, 1])

        binarizer.transform(X_train.iloc[:, 1])
        assert len(binarizer.classes_) == 2 
Example 29
Project: Jtyoui   Author: jtyoui   File: neighbor.py    License: MIT License 5 votes vote down vote up
def knn_class_fit(train, label):
    """训练数据模型"""
    binary = LabelBinarizer()  # 二值化
    y_ = binary.fit_transform(label)
    clf = KNeighborsClassifier()
    clf.fit(train, np.ravel(y_))
    return clf, binary 
Example 30
Project: kaggle-code   Author: CNuge   File: clean_to_np_matrix.py    License: MIT License 5 votes vote down vote up
def binarize_col(train, test, col):
	encoder = LabelBinarizer()

	cat_train_1hot = encoder.fit_transform(train[col])
	
	cat_test_1hot = encoder.transform(test[col])

	return cat_train_1hot, cat_test_1hot