Python sklearn.impute.SimpleImputer() Examples

The following are 30 code examples of sklearn.impute.SimpleImputer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.impute , or try the search function .
Example #1
Source File: estimator.py    From ramp-workflow with BSD 3-Clause "New" or "Revised" License 8 votes vote down vote up
def get_estimator():

    categorical_cols = ['Sex', 'Pclass', 'Embarked']
    numerical_cols = ['Age', 'SibSp', 'Parch', 'Fare']

    preprocessor = make_column_transformer(
        (OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        (SimpleImputer(strategy='constant', fill_value=-1), numerical_cols),
    )

    pipeline = Pipeline([
        ('transformer', preprocessor),
        ('classifier', LogisticRegression()),
    ])

    return pipeline 
Example #2
Source File: estimator.py    From ramp-workflow with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def get_estimator():

    categorical_cols = ['Sex', 'Pclass', 'Embarked']
    numerical_cols = ['Age', 'SibSp', 'Parch', 'Fare']

    preprocessor = make_column_transformer(
        (OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        (SimpleImputer(strategy='constant', fill_value=-1), numerical_cols),
    )

    pipeline = Pipeline([
        ('transformer', preprocessor),
        ('classifier', LogisticRegression()),
    ])

    return pipeline 
Example #3
Source File: test_cml_ImputerConverter.py    From onnxmltools with MIT License 6 votes vote down vote up
def test_imputer(self):
        try:
            model = Imputer(missing_values='NaN', strategy='mean', axis=0)
        except TypeError:
            model = Imputer(missing_values=np.nan, strategy='mean')
            model.axis = 0
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)
        from onnxmltools.convert.coreml.convert import convert
        import coremltools  # noqa
        try:
            model_coreml = coremltools.converters.sklearn.convert(model)
        except ValueError as e:
            if 'not supported' in str(e):
                # Python 2.7 + scikit-learn 0.22
                return
        model_onnx = convert(model_coreml.get_spec())
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(np.array(data, dtype=np.float32),
                            model, model_onnx, basename="CmlImputerMeanFloat32") 
Example #4
Source File: test_sklearn_imputer_converter.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_simple_imputer_float_inputs(self):
        model = SimpleImputer(strategy="mean", fill_value="nan")
        data = [[1, 2], [np.nan, 3], [7, 6]]
        model.fit(data)

        model_onnx = convert_sklearn(
            model,
            "scikit-learn simple imputer",
            [("input", FloatTensorType([None, 2]))],
            target_opset=TARGET_OPSET)
        self.assertTrue(model_onnx.graph.node is not None)

        # should contain only node
        self.assertEqual(len(model_onnx.graph.node), 1)

        # last node should contain the Imputer
        outputs = model_onnx.graph.output
        self.assertEqual(len(outputs), 1)
        self.assertEqual(
            outputs[0].type.tensor_type.shape.dim[-1].dim_value, 2)
        dump_data_and_model(
            np.array(data, dtype=np.float32),
            model, model_onnx,
            basename="SklearnSimpleImputerMeanFloat32") 
Example #5
Source File: base.py    From tpot with GNU Lesser General Public License v3.0 6 votes vote down vote up
def _impute_values(self, features):
        """Impute missing values in a feature set.

        Parameters
        ----------
        features: array-like {n_samples, n_features}
            A feature matrix

        Returns
        -------
        array-like {n_samples, n_features}
        """
        if self.verbosity > 1:
            print('Imputing missing values in feature set')

        if self._fitted_imputer is None:
            self._fitted_imputer = SimpleImputer(strategy="median")
            self._fitted_imputer.fit(features)

        return self._fitted_imputer.transform(features) 
Example #6
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_simple_imputation_add_indicator_sparse_matrix(arr_type):
    X_sparse = arr_type([
        [np.nan, 1, 5],
        [2, np.nan, 1],
        [6, 3, np.nan],
        [1, 2, 9]
    ])
    X_true = np.array([
        [3., 1., 5., 1., 0., 0.],
        [2., 2., 1., 0., 1., 0.],
        [6., 3., 5., 0., 0., 1.],
        [1., 2., 9., 0., 0., 0.],
    ])

    imputer = SimpleImputer(missing_values=np.nan, add_indicator=True)
    X_trans = imputer.fit_transform(X_sparse)

    assert sparse.issparse(X_trans)
    assert X_trans.shape == X_true.shape
    assert_allclose(X_trans.toarray(), X_true) 
Example #7
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_imputation_pipeline_grid_search():
    # Test imputation within a pipeline + gridsearch.
    X = sparse_random_matrix(100, 100, density=0.10)
    missing_values = X.data[0]

    pipeline = Pipeline([('imputer',
                          SimpleImputer(missing_values=missing_values)),
                         ('tree',
                          tree.DecisionTreeRegressor(random_state=0))])

    parameters = {
        'imputer__strategy': ["mean", "median", "most_frequent"]
    }

    Y = sparse_random_matrix(100, 1, density=0.10).toarray()
    gs = GridSearchCV(pipeline, parameters)
    gs.fit(X, Y) 
Example #8
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_imputation_constant_pandas(dtype):
    # Test imputation using the constant strategy on pandas df
    pd = pytest.importorskip("pandas")

    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                    ",i,x,\n"
                    "a,,y,\n"
                    "a,j,,\n"
                    "b,j,x,")

    df = pd.read_csv(f, dtype=dtype)

    X_true = np.array([
        ["missing_value", "i", "x", "missing_value"],
        ["a", "missing_value", "y", "missing_value"],
        ["a", "j", "missing_value", "missing_value"],
        ["b", "j", "x", "missing_value"]
    ], dtype=object)

    imputer = SimpleImputer(strategy="constant")
    X_trans = imputer.fit_transform(df)

    assert_array_equal(X_trans, X_true) 
Example #9
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_imputation_constant_object(marker):
    # Test imputation using the constant strategy on objects
    X = np.array([
        [marker, "a", "b", marker],
        ["c", marker, "d", marker],
        ["e", "f", marker, marker],
        ["g", "h", "i", marker]
    ], dtype=object)

    X_true = np.array([
        ["missing", "a", "b", "missing"],
        ["c", "missing", "d", "missing"],
        ["e", "f", "missing", "missing"],
        ["g", "h", "i", "missing"]
    ], dtype=object)

    imputer = SimpleImputer(missing_values=marker, strategy="constant",
                            fill_value="missing")
    X_trans = imputer.fit_transform(X)

    assert_array_equal(X_trans, X_true) 
Example #10
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_imputation_constant_float(array_constructor):
    # Test imputation using the constant strategy on floats
    X = np.array([
        [np.nan, 1.1, 0, np.nan],
        [1.2, np.nan, 1.3, np.nan],
        [0, 0, np.nan, np.nan],
        [1.4, 1.5, 0, np.nan]
    ])

    X_true = np.array([
        [-1, 1.1, 0, -1],
        [1.2, -1, 1.3, -1],
        [0, 0, -1, -1],
        [1.4, 1.5, 0, -1]
    ])

    X = array_constructor(X)

    X_true = array_constructor(X_true)

    imputer = SimpleImputer(strategy="constant", fill_value=-1)
    X_trans = imputer.fit_transform(X)

    assert_allclose_dense_sparse(X_trans, X_true) 
Example #11
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_imputation_most_frequent_pandas(dtype):
    # Test imputation using the most frequent strategy on pandas df
    pd = pytest.importorskip("pandas")

    f = io.StringIO("Cat1,Cat2,Cat3,Cat4\n"
                    ",i,x,\n"
                    "a,,y,\n"
                    "a,j,,\n"
                    "b,j,x,")

    df = pd.read_csv(f, dtype=dtype)

    X_true = np.array([
        ["a", "i", "x"],
        ["a", "j", "y"],
        ["a", "j", "x"],
        ["b", "j", "x"]
    ], dtype=object)

    imputer = SimpleImputer(strategy="most_frequent")
    X_trans = imputer.fit_transform(df)

    assert_array_equal(X_trans, X_true) 
Example #12
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_imputation_most_frequent_objects(marker):
    # Test imputation using the most-frequent strategy.
    X = np.array([
        [marker, marker, "a", "f"],
        [marker, "c", marker, "d"],
        [marker, "b", "d", marker],
        [marker, "c", "d", "h"],
    ], dtype=object)

    X_true = np.array([
        ["c", "a", "f"],
        ["c", "d", "d"],
        ["b", "d", "d"],
        ["c", "d", "h"],
    ], dtype=object)

    imputer = SimpleImputer(missing_values=marker,
                            strategy="most_frequent")
    X_trans = imputer.fit(X).transform(X)

    assert_array_equal(X_trans, X_true) 
Example #13
Source File: test_impute.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_imputation_most_frequent():
    # Test imputation using the most-frequent strategy.
    X = np.array([
        [-1, -1, 0, 5],
        [-1, 2, -1, 3],
        [-1, 1, 3, -1],
        [-1, 2, 3, 7],
    ])

    X_true = np.array([
        [2, 0, 5],
        [2, 3, 3],
        [1, 3, 3],
        [2, 3, 7],
    ])

    # scipy.stats.mode, used in SimpleImputer, doesn't return the first most
    # frequent as promised in the doc but the lowest most frequent. When this
    # test will fail after an update of scipy, SimpleImputer will need to be
    # updated to be consistent with the new (correct) behaviour
    _check_statistics(X, X_true, "most_frequent", [np.nan, 2, 3, 3], -1) 
Example #14
Source File: survey_dataset_reader.py    From cs-ranking with Apache License 2.0 6 votes vote down vote up
def __load_dataset__(self):
        df = pd.io.stata.read_stata(self.train_file)
        orderings = []
        features = []
        for row in df.itertuples():
            orderings.append(row[4:8])
            context_feature = [float(i) if i != "." else np.NAN for i in row[13:33]]
            features.append(context_feature)
        X = np.array(features)
        X = SimpleImputer().fit_transform(X)
        X = np.array([np.log(np.array(X[:, i]) + 1) for i in range(len(features[0]))])
        X = np.array(X.T)
        self.X = StandardScaler().fit_transform(X)
        orderings = np.array(orderings) - 1
        self.Y = ranking_ordering_conversion(orderings)
        self.__check_dataset_validity__() 
Example #15
Source File: imputation.py    From Auto-PyTorch with Apache License 2.0 6 votes vote down vote up
def fit(self, hyperparameter_config, X, train_indices, dataset_info):
        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)

        if dataset_info.is_sparse:
            return {'imputation_preprocessor': None, 'all_nan_columns': None}

        # delete all nan columns
        all_nan = np.all(np.isnan(X), axis=0)
        X = X[:, ~all_nan]
        dataset_info.categorical_features = [dataset_info.categorical_features[i] for i, is_nan in enumerate(all_nan) if not is_nan]

        strategy = hyperparameter_config['strategy']
        fill_value = int(np.nanmax(X)) + 1 if not dataset_info.is_sparse else 0
        numerical_imputer = SimpleImputer(strategy=strategy, copy=False)
        categorical_imputer = SimpleImputer(strategy='constant', copy=False, fill_value=fill_value)
        transformer = ColumnTransformer(
            transformers=[('numerical_imputer', numerical_imputer, [i for i, c in enumerate(dataset_info.categorical_features) if not c]),
                          ('categorical_imputer', categorical_imputer,  [i for i, c in enumerate(dataset_info.categorical_features) if c])])
        transformer.fit(X[train_indices])
        X = transformer.transform(X)
        
        dataset_info.categorical_features = sorted(dataset_info.categorical_features)
        return { 'X': X, 'imputation_preprocessor': transformer, 'dataset_info': dataset_info , 'all_nan_columns': all_nan} 
Example #16
Source File: lr_model.py    From autogluon with Apache License 2.0 5 votes vote down vote up
def preprocess_train(self, X, feature_types, vect_max_features):
        transformer_list = []
        if len(feature_types['language']) > 0:
            pipeline = Pipeline(steps=[
                ("preparator", NlpDataPreprocessor(nlp_cols=feature_types['language'])),
                ("vectorizer",
                 TfidfVectorizer(ngram_range=self.params['proc.ngram_range'], sublinear_tf=True, max_features=vect_max_features, tokenizer=self.tokenize))
            ])
            transformer_list.append(('vect', pipeline))
        if len(feature_types['onehot']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', OheFeaturesGenerator(cats_cols=feature_types['onehot'])),
            ])
            transformer_list.append(('cats', pipeline))
        if len(feature_types['continuous']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['continuous'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('scaler', StandardScaler())
            ])
            transformer_list.append(('cont', pipeline))
        if len(feature_types['skewed']) > 0:
            pipeline = Pipeline(steps=[
                ('generator', NumericDataPreprocessor(cont_cols=feature_types['skewed'])),
                ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
                ('quantile', QuantileTransformer(output_distribution='normal')),  # Or output_distribution = 'uniform'
            ])
            transformer_list.append(('skew', pipeline))
        self.pipeline = FeatureUnion(transformer_list=transformer_list)
        self.pipeline.fit(X) 
Example #17
Source File: test_model_selection_sklearn.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_grid_search_allows_nans():
    # Test dcv.GridSearchCV with Imputer
    X = np.arange(20, dtype=np.float64).reshape(5, -1)
    X[2, :] = np.nan
    y = [0, 0, 1, 1, 1]

    imputer = SimpleImputer(strategy="mean", missing_values=np.nan)
    p = Pipeline([("imputer", imputer), ("classifier", MockClassifier())])
    dcv.GridSearchCV(p, {"classifier__foo_param": [1, 2, 3]}, cv=2).fit(X, y) 
Example #18
Source File: test_feat_mappers.py    From interpret-community with MIT License 5 votes vote down vote up
def _get_nested_pipelines_and_data(self, last_transformer=None):
        # returns a pipeline that can be used to test nested pipelines. When last_transformer is not None, it is
        # added as the last transformer in pipeline_1
        steps = [("a", SimpleImputer()), ("b", OneHotEncoder())]
        if last_transformer:
            steps.append(("c", last_transformer))
        pipeline_1 = Pipeline(steps)
        pipeline = Pipeline([("a", SimpleImputer()), ("b", pipeline_1)])
        x = np.zeros((5, 2))
        x[0, 0] = 1
        x[0, 1] = 1
        x[1, 0] = 2

        return pipeline.fit(x), x 
Example #19
Source File: test_data_mapper.py    From interpret-community with MIT License 5 votes vote down vote up
def test_pipeline_transform_list(self):
        pipeline = Pipeline([("imputer", SimpleImputer()), ("onehotencoder", OneHotEncoder())])
        x = np.ones((3, 2))
        pipeline.fit(x)
        data_mapper = DataMapper([([0, 1], pipeline)])
        result = data_mapper.transform(x)
        assert result.shape == (3, 2) 
Example #20
Source File: test_feat_mappers.py    From interpret-community with MIT License 5 votes vote down vote up
def test_identity_mapper(self):
        x = np.zeros((5, 2))
        imputer = SimpleImputer()
        imputer.fit(x)
        mapper = IdentityMapper(imputer)
        mapper.transform(x)

        feature_map = np.eye(2)
        assert np.all(mapper.feature_map == feature_map) 
Example #21
Source File: test_feat_mappers.py    From interpret-community with MIT License 5 votes vote down vote up
def test_get_feature_mapper_tuple_for_pipeline(self):
        pipeline = Pipeline([("a", SimpleImputer()), ("b", SimpleImputer()), ("c", OneHotEncoder())])
        x = np.zeros((5, 2))
        x[0, 0] = 1
        x[0, 1] = 1
        pipeline.fit(x)
        feature_mapper = get_feature_mapper_for_pipeline(pipeline)
        feature_mapper.transform(x)

        feature_map = np.zeros((2, 4))
        feature_map[0, :2] = 1
        feature_map[1, 2:] = 1
        assert np.all(feature_mapper.feature_map == feature_map) 
Example #22
Source File: test_data_mapper.py    From interpret-community with MIT License 5 votes vote down vote up
def test_pipeline_transform_column_transformer(self):
        pipeline = Pipeline([("imputer", SimpleImputer()), ("onehotencoder", OneHotEncoder())])
        x = np.ones((3, 2))
        column_transformer = ColumnTransformer([
            ("column", pipeline, [0, 1])
        ])
        column_transformer.fit(x)
        data_mapper = DataMapper(column_transformer)
        result = data_mapper.transform(x)
        assert result.shape == (3, 2) 
Example #23
Source File: test_ceteris_paribus.py    From DALEX with GNU General Public License v3.0 5 votes vote down vote up
def setUp(self):
        data = dx.datasets.load_titanic()
        data.loc[:, 'survived'] = LabelEncoder().fit_transform(data.survived)

        self.X = data.drop(columns='survived')
        self.y = data.survived

        numeric_features = ['age', 'fare', 'sibsp', 'parch']
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())])

        categorical_features = ['gender', 'class', 'embarked']
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_features),
                ('cat', categorical_transformer, categorical_features)])

        clf = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', MLPClassifier(hidden_layer_sizes=(50, 100, 50),
                                                           max_iter=400, random_state=0))])

        clf.fit(self.X, self.y)

        self.exp = dx.Explainer(clf, self.X, self.y, verbose=False) 
Example #24
Source File: test_aggregated_profiles.py    From DALEX with GNU General Public License v3.0 5 votes vote down vote up
def setUp(self):
        data = dx.datasets.load_titanic()
        data.loc[:, 'survived'] = LabelEncoder().fit_transform(data.survived)

        self.X = data.drop(columns='survived')
        self.y = data.survived

        numeric_features = ['age', 'fare', 'sibsp', 'parch']
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())])

        categorical_features = ['gender', 'class', 'embarked']
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_features),
                ('cat', categorical_transformer, categorical_features)])

        clf = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', MLPClassifier(hidden_layer_sizes=(20, 20),
                                                           max_iter=400, random_state=0))])
        clf2 = Pipeline(steps=[('preprocessor', preprocessor),
                               ('classifier', MLPClassifier(hidden_layer_sizes=(50, 100, 50),
                                                            max_iter=400, random_state=0))])

        clf.fit(self.X, self.y)
        clf2.fit(self.X, self.y)

        self.exp = dx.Explainer(clf, self.X, self.y, label="model1", verbose=False)
        self.exp2 = dx.Explainer(clf2, self.X, self.y, verbose=False)
        self.exp3 = dx.Explainer(clf, self.X, self.y, label="model3", verbose=False) 
Example #25
Source File: test_model_performance.py    From DALEX with GNU General Public License v3.0 5 votes vote down vote up
def setUp(self):
        data = dx.datasets.load_titanic()
        data.loc[:, 'survived'] = LabelEncoder().fit_transform(data.survived)

        self.X = data.drop(columns='survived')
        self.y = data.survived

        numeric_features = ['age', 'fare', 'sibsp', 'parch']
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())])

        categorical_features = ['gender', 'class', 'embarked']
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))])

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_features),
                ('cat', categorical_transformer, categorical_features)])

        clf = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', MLPClassifier(hidden_layer_sizes=(50, 100, 50),
                                                           max_iter=400, random_state=0))])

        clf.fit(self.X, self.y)

        self.exp = dx.Explainer(clf, self.X, self.y, verbose=False)
        self.exp2 = dx.Explainer(clf, self.X, self.y, label="model2", verbose=False) 
Example #26
Source File: data_cleaner.py    From MAST-ML with MIT License 5 votes vote down vote up
def imputation(df, strategy, cols_to_leave_out=None):
    """
    Method that imputes values to the missing places based on the median, mean, etc. of the data in the column

    Args:
        df: (dataframe), pandas dataframe containing data
        strategy: (str), method of imputation, e.g. median, mean, etc.
        cols_to_leave_out: (list), list of column indices to not include in imputation

    Returns:
        df: (dataframe): dataframe with NaN or missing values resolved via imputation

    """
    col_names = df.columns.tolist()
    if cols_to_leave_out is None:
        df_imputed = pd.DataFrame(Imputer(missing_values='NaN', strategy=strategy, axis=0).fit_transform(df))
    else:
        df_include = df.drop(cols_to_leave_out, axis=1)
        df_hold_out = df.drop([c for c in df.columns if c not in cols_to_leave_out], axis=1)
        df_imputed = pd.DataFrame(Imputer(missing_values='NaN', strategy=strategy, axis=0).fit_transform(df_include), columns=df_include.columns)
    # Need to join the imputed dataframe with the columns containing strings that were held out
    if cols_to_leave_out is None:
        df = df_imputed
    else:
        df = pd.concat([df_hold_out, df_imputed], axis=1)
        col_names = df.columns.tolist()
    return df 
Example #27
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_mapper(self):
		domain = ContinuousDomain()
		df = DataFrame([{"X1" : 2.0, "X2" : 2, "y" : 2.0}, {"X1" : 1.0, "X2" : 0.5}, {"X1" : 2}, {"X2" : 2}, {"X1" : 2.0, "y" : 1}, {"X1" : 3.0, "X2" : 3.5}])
		mapper = DataFrameMapper([
			(["X1", "X2"], [domain, SimpleImputer(), StandardScaler()]),
			("y", None)
		])
		mapper.fit_transform(df)
		self.assertEqual({"totalFreq" : [6, 6], "missingFreq" : [1, 2], "invalidFreq" : [0, 0]}, _array_to_list(domain.counts_))
		self.assertEqual({"minimum" : [1.0, 0.5], "maximum" : [3.0, 3.5], "mean" : [2.0, 2.0]}, _array_to_list(dict((k, domain.numeric_info_[k]) for k in ["minimum", "maximum", "mean"])))
		self.assertEqual([1.0, 0.5], domain.data_min_.tolist())
		self.assertEqual([3.0, 3.5], domain.data_max_.tolist()) 
Example #28
Source File: classifier.py    From ramp-workflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self):
        self.clf = Pipeline([
            ('imputer', SimpleImputer(strategy='median')),
            ('classifier', LogisticRegression(C=1., solver='lbfgs'))
        ]) 
Example #29
Source File: __init__.py    From sklearn2pmml with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_sequence_transform(self):
		X = DataFrame([[None], [1], [None]], columns = ["a"])
		mapper = DataFrameMapper([
			(["a"], [ExpressionTransformer("0 if pandas.isnull(X[0]) else X[0]"), SimpleImputer(missing_values = 0)])
		])
		Xt = mapper.fit_transform(X)
		self.assertEqual([[1], [1], [1]], Xt.tolist()) 
Example #30
Source File: movie_maoyan_knn_font.py    From akshare with MIT License 5 votes vote down vote up
def process_data(data):
        imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
        return pd.DataFrame(imputer.fit_transform(pd.DataFrame(data)))