Python sklearn.neighbors.KNeighborsRegressor() Examples

The following are 30 code examples of sklearn.neighbors.KNeighborsRegressor(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.neighbors , or try the search function .
Example #1
Source File: test_bagging.py    From Mastering-Elasticsearch-7.0 with MIT License 9 votes vote down vote up
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR(gamma='scale')]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test) 
Example #2
Source File: friedman_scores.py    From mlens with MIT License 7 votes vote down vote up
def build_ensemble(**kwargs):
    """Generate ensemble."""

    ens = SuperLearner(**kwargs)
    prep = {'Standard Scaling': [StandardScaler()],
            'Min Max Scaling': [MinMaxScaler()],
            'No Preprocessing': []}

    est = {'Standard Scaling':
               [ElasticNet(), Lasso(), KNeighborsRegressor()],
           'Min Max Scaling':
               [SVR()],
           'No Preprocessing':
               [RandomForestRegressor(random_state=SEED),
                GradientBoostingRegressor()]}

    ens.add(est, prep)

    ens.add(GradientBoostingRegressor(), meta=True)

    return ens 
Example #3
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_KNeighborsRegressor_multioutput_uniform_weight():
    # Test k-neighbors in multi-output regression with uniform weight
    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    for algorithm, weights in product(ALGORITHMS, [None, 'uniform']):
        knn = neighbors.KNeighborsRegressor(weights=weights,
                                            algorithm=algorithm)
        knn.fit(X_train, y_train)

        neigh_idx = knn.kneighbors(X_test, return_distance=False)
        y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
                               for idx in neigh_idx])

        y_pred = knn.predict(X_test)

        assert_equal(y_pred.shape, y_test.shape)
        assert_equal(y_pred_idx.shape, y_test.shape)
        assert_array_almost_equal(y_pred, y_pred_idx) 
Example #4
Source File: RegressionKNN.py    From AirTicketPredicting with MIT License 6 votes vote down vote up
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,100)
                             }
                            ]


        reg = GridSearchCV(neighbors.KNeighborsRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train)

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print reg.scorer_

        print "MSE for test data set:"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_pred, y_true) 
Example #5
Source File: dummy_outcome_refuter.py    From dowhy with MIT License 6 votes vote down vote up
def _get_regressor_object(self, action, **func_args):
        """
        Return a sklearn estimator object based on the estimator and corresponding parameters

        - 'action': str
        The sklearn estimator used.
        - 'func_args': variable length keyworded argument
        The parameters passed to the sklearn estimator.
        """
        if  action == "linear_regression":
            return LinearRegression(**func_args)
        elif action == "knn":
            return KNeighborsRegressor(**func_args)
        elif action == "svm":
            return SVR(**func_args)
        elif action == "random_forest":
            return RandomForestRegressor(**func_args)
        elif action == "neural_network":
            return MLPRegressor(**func_args)
        else:
            raise ValueError("The function: {} is not supported by dowhy at the moment.".format(action)) 
Example #6
Source File: plot_kneighbors_regularization.py    From scipy_2015_sklearn_tutorial with Creative Commons Zero v1.0 Universal 6 votes vote down vote up
def plot_kneighbors_regularization():
    rnd = np.random.RandomState(42)
    x = np.linspace(-3, 3, 100)
    y_no_noise = np.sin(4 * x) + x
    y = y_no_noise + rnd.normal(size=len(x))
    X = x[:, np.newaxis]
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))

    x_test = np.linspace(-3, 3, 1000)

    for n_neighbors, ax in zip([2, 5, 20], axes.ravel()):
        kneighbor_regression = KNeighborsRegressor(n_neighbors=n_neighbors)
        kneighbor_regression.fit(X, y)
        ax.plot(x, y_no_noise, label="true function")
        ax.plot(x, y, "o", label="data")
        ax.plot(x_test, kneighbor_regression.predict(x_test[:, np.newaxis]),
                label="prediction")
        ax.legend()
        ax.set_title("n_neighbors = %d" % n_neighbors) 
Example #7
Source File: test_neighbors.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.neighbors.NearestNeighbors,
                      neighbors.NearestNeighbors)
        self.assertIs(df.neighbors.KNeighborsClassifier,
                      neighbors.KNeighborsClassifier)
        self.assertIs(df.neighbors.RadiusNeighborsClassifier,
                      neighbors.RadiusNeighborsClassifier)
        self.assertIs(df.neighbors.KNeighborsRegressor,
                      neighbors.KNeighborsRegressor)
        self.assertIs(df.neighbors.RadiusNeighborsRegressor,
                      neighbors.RadiusNeighborsRegressor)
        self.assertIs(df.neighbors.NearestCentroid, neighbors.NearestCentroid)
        self.assertIs(df.neighbors.BallTree, neighbors.BallTree)
        self.assertIs(df.neighbors.KDTree, neighbors.KDTree)
        self.assertIs(df.neighbors.DistanceMetric, neighbors.DistanceMetric)
        self.assertIs(df.neighbors.KernelDensity, neighbors.KernelDensity) 
Example #8
Source File: test_custom_transformer.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def __init__(self,
                 transformer=None,
                 estimator=None,
                 normalize=True,
                 keep_tsne_outputs=False,
                 **kwargs):
        TransformerMixin.__init__(self)
        BaseEstimator.__init__(self)
        if estimator is None:
            estimator = KNeighborsRegressor()
        if transformer is None:
            transformer = TSNE()
        self.estimator = estimator
        self.transformer = transformer
        self.keep_tsne_outputs = keep_tsne_outputs
        if not hasattr(transformer, "fit_transform"):
            raise AttributeError(
                "transformer {} does not have a 'fit_transform' "
                "method.".format(type(transformer)))
        if not hasattr(estimator, "predict"):
            raise AttributeError("estimator {} does not have a 'predict' "
                                 "method.".format(type(estimator)))
        self.normalize = normalize
        if kwargs:
            self.set_params(**kwargs) 
Example #9
Source File: testScoreWithAdapaSklearn.py    From nyoka with Apache License 2.0 6 votes vote down vote up
def test_33_knn_regressor(self):
        print("\ntest 33 (knn regressor without preprocessing)\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_regression()

        model = KNeighborsRegressor()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test33sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, _ = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) 
Example #10
Source File: ptdf_ts.py    From GridCal with GNU General Public License v3.0 6 votes vote down vote up
def knn_interp(X, Y, perc):

    k_split = int(X.shape[0] * perc)
    X_train = X[:k_split]
    Y_train = Y[:k_split]
    X_test = X[k_split:]
    Y_test = Y[k_split:]

    n_neighbors = 5
    model = neighbors.KNeighborsRegressor(n_neighbors)

    print('Fitting...')
    model.fit(X_train, Y_train)

    print('Predicting...')
    Y_predict = model.predict(X_test)

    print('Scoring...')
    score = model.score(X_test, Y_test)

    print('Score:', score)

    Y_predict 
Example #11
Source File: flow_data.py    From floris with Apache License 2.0 6 votes vote down vote up
def get_points_from_flow_data(self, x_points, y_points, z_points):
        """
        Return the u-value of a set of points from with a FlowData object.
        Use a simple nearest neighbor regressor to do internal interpolation.

        Args:
            x_points (np.array): Array of x-locations of points.
            y_points (np.array): Array of y-locations of points.
            z_points (np.array): Array of z-locations of points.

        Returns:
            np.array: Array of u-velocity at specified points.
        """
        # print(x_points,y_points,z_points)
        X = np.column_stack([self.x, self.y, self.z])
        n_neighbors = 1
        knn = neighbors.KNeighborsRegressor(n_neighbors)
        y_ = knn.fit(X, self.u)  # .predict(T)

        # Predict new points
        T = np.column_stack([x_points, y_points, z_points])
        return knn.predict(T) 
Example #12
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    for algorithm in ALGORITHMS:
        clf = neighbors.KNeighborsClassifier(n_neighbors=1,
                                             algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_array_equal(clf.predict(iris.data), iris.target)

        clf.set_params(n_neighbors=9, algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert np.mean(clf.predict(iris.data) == iris.target) > 0.95

        rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
        rgs.fit(iris.data, iris.target)
        assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
                       0.95) 
Example #13
Source File: test_bagging.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "max_features": [0.5, 1.0],
                          "bootstrap": [True, False],
                          "bootstrap_features": [True, False]})

    for base_estimator in [None,
                           DummyRegressor(),
                           DecisionTreeRegressor(),
                           KNeighborsRegressor(),
                           SVR()]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test) 
Example #14
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_kneighbors_regressor_multioutput(n_samples=40,
                                          n_features=5,
                                          n_test_pts=10,
                                          n_neighbors=3,
                                          random_state=0):
    # Test k-neighbors in multi-output regression
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X ** 2).sum(1))
    y /= y.max()
    y = np.vstack([y, y]).T

    y_target = y[:n_test_pts]

    weights = ['uniform', 'distance', _weight_func]
    for algorithm, weights in product(ALGORITHMS, weights):
        knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                            weights=weights,
                                            algorithm=algorithm)
        knn.fit(X, y)
        epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
        y_pred = knn.predict(X[:n_test_pts] + epsilon)
        assert_equal(y_pred.shape, y_target.shape)

        assert np.all(np.abs(y_pred - y_target) < 0.3) 
Example #15
Source File: test_neighbors.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_kneighbors_regressor(n_samples=40,
                              n_features=5,
                              n_test_pts=10,
                              n_neighbors=3,
                              random_state=0):
    # Test k-neighbors regression
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X ** 2).sum(1))
    y /= y.max()

    y_target = y[:n_test_pts]

    weight_func = _weight_func

    for algorithm in ALGORITHMS:
        for weights in ['uniform', 'distance', weight_func]:
            knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                                weights=weights,
                                                algorithm=algorithm)
            knn.fit(X, y)
            epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
            y_pred = knn.predict(X[:n_test_pts] + epsilon)
            assert np.all(abs(y_pred - y_target) < 0.3) 
Example #16
Source File: test_bagging.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_single_estimator():
    # Check singleton ensembles.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    clf1 = BaggingRegressor(base_estimator=KNeighborsRegressor(),
                            n_estimators=1,
                            bootstrap=False,
                            bootstrap_features=False,
                            random_state=rng).fit(X_train, y_train)

    clf2 = KNeighborsRegressor().fit(X_train, y_train)

    assert_array_equal(clf1.predict(X_test), clf2.predict(X_test)) 
Example #17
Source File: test_bagging.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_single_estimator():
    # Check singleton ensembles.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        random_state=rng)

    clf1 = BaggingRegressor(base_estimator=KNeighborsRegressor(),
                            n_estimators=1,
                            bootstrap=False,
                            bootstrap_features=False,
                            random_state=rng).fit(X_train, y_train)

    clf2 = KNeighborsRegressor().fit(X_train, y_train)

    assert_array_almost_equal(clf1.predict(X_test), clf2.predict(X_test)) 
Example #18
Source File: test_neighbors.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_KNeighborsRegressor_multioutput_uniform_weight():
    # Test k-neighbors in multi-output regression with uniform weight
    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    for algorithm, weights in product(ALGORITHMS, [None, 'uniform']):
        knn = neighbors.KNeighborsRegressor(weights=weights,
                                            algorithm=algorithm)
        knn.fit(X_train, y_train)

        neigh_idx = knn.kneighbors(X_test, return_distance=False)
        y_pred_idx = np.array([np.mean(y_train[idx], axis=0)
                               for idx in neigh_idx])

        y_pred = knn.predict(X_test)

        assert_equal(y_pred.shape, y_test.shape)
        assert_equal(y_pred_idx.shape, y_test.shape)
        assert_array_almost_equal(y_pred, y_pred_idx) 
Example #19
Source File: test_neighbors.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_kneighbors_regressor_multioutput(n_samples=40,
                                          n_features=5,
                                          n_test_pts=10,
                                          n_neighbors=3,
                                          random_state=0):
    # Test k-neighbors in multi-output regression
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X ** 2).sum(1))
    y /= y.max()
    y = np.vstack([y, y]).T

    y_target = y[:n_test_pts]

    weights = ['uniform', 'distance', _weight_func]
    for algorithm, weights in product(ALGORITHMS, weights):
        knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                            weights=weights,
                                            algorithm=algorithm)
        knn.fit(X, y)
        epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
        y_pred = knn.predict(X[:n_test_pts] + epsilon)
        assert_equal(y_pred.shape, y_target.shape)

        assert_true(np.all(np.abs(y_pred - y_target) < 0.3)) 
Example #20
Source File: test_neighbors.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_kneighbors_regressor_sparse(n_samples=40,
                                     n_features=5,
                                     n_test_pts=10,
                                     n_neighbors=5,
                                     random_state=0):
    # Test radius-based regression on sparse matrices
    # Like the above, but with various types of sparse matrices
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = ((X ** 2).sum(axis=1) < .25).astype(np.int)

    for sparsemat in SPARSE_TYPES:
        knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                            algorithm='auto')
        knn.fit(sparsemat(X), y)
        for sparsev in SPARSE_OR_DENSE:
            X2 = sparsev(X)
            assert_true(np.mean(knn.predict(X2).round() == y) > 0.95) 
Example #21
Source File: test_neighbors.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_neighbors_iris():
    # Sanity checks on the iris dataset
    # Puts three points of each label in the plane and performs a
    # nearest neighbor query on points near the decision boundary.

    for algorithm in ALGORITHMS:
        clf = neighbors.KNeighborsClassifier(n_neighbors=1,
                                             algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_array_equal(clf.predict(iris.data), iris.target)

        clf.set_params(n_neighbors=9, algorithm=algorithm)
        clf.fit(iris.data, iris.target)
        assert_true(np.mean(clf.predict(iris.data) == iris.target) > 0.95)

        rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
        rgs.fit(iris.data, iris.target)
        assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
                       0.95) 
Example #22
Source File: convex_opt.py    From oboe with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, sizes, sizes_index, runtimes, runtimes_index):
        """Fit polynomial regression on pre-recorded runtimes on datasets."""
        # assert sizes.shape[0] == runtimes.shape[0], "Dataset sizes and runtimes must be recorded on same datasets."
        for i in set(runtimes_index).difference(set(sizes_index)):
            dataset = openml.datasets.get_dataset(int(i))
            data_numeric, data_labels, categorical, _ = dataset.get_data(target=dataset.default_target_attribute)
            if len(sizes) == 0:
                sizes = np.array([data_numeric.shape])
                sizes_index = np.array(i)
            else:
                sizes = np.concatenate((sizes, np.array([data_numeric.shape])))
                sizes_index = np.append(sizes_index, i)

        sizes_train = np.array([sizes[list(sizes_index).index(i), :] for i in runtimes_index])
        sizes_log = np.concatenate((sizes_train, np.log(sizes_train[:, 0]).reshape(-1, 1)), axis=1)
        sizes_train_poly = PolynomialFeatures(self.degree).fit_transform(sizes_log)

        # train independent regression model to predict each runtime of each model setting
        for i in range(self.n_models):
            runtime = runtimes[:, i]
            no_nan_indices = np.where(np.invert(np.isnan(runtime)))[0]
            runtime_no_nan = runtime[no_nan_indices]
            
            
            if self.model_name == 'LinearRegression':
                sizes_train_poly_no_nan = sizes_train_poly[no_nan_indices]
                self.models[i] = LinearRegression().fit(sizes_train_poly_no_nan, runtime_no_nan)
            elif self.model_name == 'KNeighborsRegressor':
                sizes_train_no_nan = sizes_train[no_nan_indices]
                def metric(a, b):
                    coefficients = [1, 100]
                    return np.sum(np.multiply((a - b) ** 2, coefficients))
                        
                def weights(distances):
                    return distances

                neigh = KNeighborsRegressor(n_neighbors=5, metric=metric, weights=weights)
                self.models[i] = neigh.fit(sizes_train_no_nan, runtime_no_nan)
#            print(self.models[i].coef_)
#            print(self.models[i].intercept_)
            # self.models[i] = Lasso().fit(sizes_train_poly, runtime) 
Example #23
Source File: convex_opt.py    From oboe with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def predict(self, size):
        """Predict runtime of all model settings on a dataset of given size.
        
        Args:
            size(np.array): Size of the dataset to fit runtime onto.
        Returns:
            predictions (np.array): The predicted runtime.
        """
        if self.model_name == 'LinearRegression':
            size_test = np.append(size, np.log(size[0]))
            size_test_poly = PolynomialFeatures(self.degree).fit_transform([size_test])
            predictions = np.zeros(self.n_models)
            for i in range(self.n_models):
                predictions[i] = self.models[i].predict(size_test_poly)[0]
    
        elif self.model_name == 'KNeighborsRegressor':
            predictions = np.zeros(self.n_models)
            for i in range(self.n_models):
                predictions[i] = self.models[i].predict(np.array(size).reshape(1, -1))[0]
        
#        # TO BE REMOVED: sanity check
#
#        size_check = (1000, 10)
#        size_check = np.append(size, np.log(size[0]))
#        size_check_poly = PolynomialFeatures(self.degree).fit_transform([size_check])
#        print(size_check_poly)
#        for i in range(self.n_models):
#            print(self.models[i].predict(size_check_poly)[0])

        return predictions 
Example #24
Source File: posterior_utils.py    From scVI with MIT License 5 votes vote down vote up
def proximity_imputation(real_latent1, normed_gene_exp_1, real_latent2, k=4):
    knn = KNeighborsRegressor(k, weights="distance")
    y = knn.fit(real_latent1, normed_gene_exp_1).predict(real_latent2)
    return y 
Example #25
Source File: plot_custom_model.py    From sklearn-onnx with MIT License 5 votes vote down vote up
def __init__(self, transformer=None, estimator=None,
                 normalize=True, keep_tsne_outputs=False, **kwargs):
        """
        :param transformer: `TSNE` by default
        :param estimator: `MLPRegressor` by default
        :param normalize: normalizes the outputs, centers and normalizes
            the output of the *t-SNE* and applies that same
            normalization to he prediction of the estimator
        :param keep_tsne_output: if True, keep raw outputs of
            *TSNE* is stored in member *tsne_outputs_*
        :param kwargs: sent to :meth:`set_params <mlinsights.mlmodel.
            tsne_transformer.PredictableTSNE.set_params>`, see its
            documentation to understand how to specify parameters
        """
        TransformerMixin.__init__(self)
        BaseEstimator.__init__(self)
        if estimator is None:
            estimator = KNeighborsRegressor()
        if transformer is None:
            transformer = TSNE()
        self.estimator = estimator
        self.transformer = transformer
        self.keep_tsne_outputs = keep_tsne_outputs
        if not hasattr(transformer, "fit_transform"):
            raise AttributeError(
                "Transformer {} does not have a 'fit_transform' "
                "method.".format(type(transformer)))
        if not hasattr(estimator, "predict"):
            raise AttributeError(
                "Estimator {} does not have a 'predict' method.".format(
                    type(estimator)))
        self.normalize = normalize
        if kwargs:
            self.set_params(**kwargs) 
Example #26
Source File: RegressionKNN.py    From AirTicketPredicting with MIT License 5 votes vote down vote up
def __init__(self, isTrain):
        super(RegressionKNN, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create KNN regression object
        # first parameter is the K neighbors
        # 'uniform' assigns uniform weights to each neighbor
        # 'distance' assigns weights proportional to the inverse of the distance from the query point
        # default metric is euclidean distance
        self.regr = neighbors.KNeighborsRegressor(86, weights='distance') 
Example #27
Source File: test_ctregressor.py    From mvlearn with Apache License 2.0 5 votes vote down vote up
def data():
    random_seed = 10
    N = 100
    D1 = 10
    D2 = 6
    N_test = 5
    random_data = []
    np.random.seed(random_seed)
    random_data.append(np.random.rand(N, D1))
    random_data.append(np.random.rand(N, D2))
    random_labels = np.random.rand(N)
    random_labels[:-10] = np.nan
    random_test = []
    random_test.append(np.random.rand(N_test, D1))
    random_test.append(np.random.rand(N_test, D2))
    knn1 = KNeighborsRegressor()
    knn2 = KNeighborsRegressor()
    reg_test = CTRegressor(
        estimator1=knn1, estimator2=knn2, random_state=random_seed)

    return {
        'N_test': N_test,
        'reg_test': reg_test,
        'random_data': random_data,
        'random_labels': random_labels,
        'random_test': random_test,
        'random_seed': random_seed} 
Example #28
Source File: test_ctregressor.py    From mvlearn with Apache License 2.0 5 votes vote down vote up
def test_set_n_neighbors_as_one():
    X1 = [[0], [1], [2], [3], [4], [5], [6]]
    X2 = [[2], [3], [4], [6], [7], [8], [10]]
    y = [10, -200, 12, 13, -100, 15, 16]
    y_train = [10, np.nan, np.nan, 13, np.nan, 15, 16]
    truth = [10.75, 10.75, 12.25, 13.75, 13.75, 14.75, 15.5]
    ctr = CTRegressor(
        KNeighborsRegressor(n_neighbors=2),
        KNeighborsRegressor(n_neighbors=2),
        k_neighbors=2, random_state=42)
    ctr.fit([X1, X2], y_train)
    pred = ctr.predict([X1, X2])
    for i, j in zip(truth, pred):
        assert abs(i-j) < 0.00000001 
Example #29
Source File: ctregression.py    From mvlearn with Apache License 2.0 5 votes vote down vote up
def __init__(
        self,
        estimator1=None,
        estimator2=None,
        k_neighbors=5,
        unlabeled_pool_size=50,
        num_iter=100,
        random_state=None
    ):

        # initialize a BaseCTEstimator object
        super().__init__(estimator1, estimator2, random_state)

        # If not given, initialize with default KNeighborsRegrssor
        if estimator1 is None:
            estimator1 = KNeighborsRegressor()
        if estimator2 is None:
            estimator2 = KNeighborsRegressor()

        # Initializing the other attributes
        self.estimator1_ = estimator1
        self.estimator2_ = estimator2
        self.k_neighbors_ = k_neighbors
        self.unlabeled_pool_size = unlabeled_pool_size
        self.num_iter = num_iter

        # Used in fit method while selecting a pool of unlabeled samples
        random.seed(random_state)

        self.n_views = 2
        self.class_name_ = "CTRegressor"

        # checks whether the parameters given is valid
        self._check_params() 
Example #30
Source File: ctregression.py    From mvlearn with Apache License 2.0 5 votes vote down vote up
def _check_params(self):
        r"""
        Checks that cotraining parameters are valid. Throws AttributeError
        if estimators are invalid. Throws ValueError if any other parameters
        are not valid. The checks performed are:
            - estimator1 and estimator2 are KNeigborsRegressor
            - k_neighbors_ is positive
            - unlabeled_pool_size is positive
            - num_iter is positive
        """

        # The estimator must be KNeighborsRegressor
        to_be_matched = "KNeighborsRegressor"

        # Taking the str of estimator object
        # returns the class name along with other parameters
        string1 = str(self.estimator1_)
        string2 = str(self.estimator2_)

        # slicing the list to get the name of the estimator
        string1 = string1[: len(to_be_matched)]
        string2 = string2[: len(to_be_matched)]

        if string1 != to_be_matched or string2 != to_be_matched:
            raise AttributeError(
                "Both the estimator needs to be KNeighborsRegressor")

        if self.k_neighbors_ <= 0:
            raise ValueError("k_neighbors must be positive")

        if self.unlabeled_pool_size <= 0:
            raise ValueError("unlabeled_pool_size must be positive")

        if self.num_iter <= 0:
            raise ValueError("number of iterations must be positive")