Python sklearn.neighbors.KernelDensity() Examples

The following are code examples for showing how to use sklearn.neighbors.KernelDensity(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Deep-SAD-PyTorch   Author: lukasruff   File: kde.py    MIT License 6 votes vote down vote up
def __init__(self, hybrid=False, kernel='gaussian', n_jobs=-1, seed=None, **kwargs):
        """Init Kernel Density Estimation instance."""
        self.kernel = kernel
        self.n_jobs = n_jobs
        self.seed = seed

        self.model = KernelDensity(kernel=kernel, **kwargs)
        self.bandwidth = self.model.bandwidth

        self.hybrid = hybrid
        self.ae_net = None  # autoencoder network for the case of a hybrid model

        self.results = {
            'train_time': None,
            'test_time': None,
            'test_auc': None,
            'test_scores': None
        } 
Example 2
Project: adversarial-policies   Author: HumanCompatibleAI   File: fit_density.py    MIT License 6 votes vote down vote up
def gen_exp_name(model_class, model_kwargs):
    """Generates experiment name from model class and parameters.

    :param model_class: (type) the class, one of GaussianMixture, PCAPreDensity or KernelDensity.
    :param model_kwargs: (dict) constructor arguments to the class.
    :return A string succinctly encoding the class and parameters."""
    if model_class == GaussianMixture:
        n_components = model_kwargs.get("n_components", 1)
        covariance_type = model_kwargs.get("covariance_type", "full")
        return f"gmm_{n_components}_components_{covariance_type}"
    elif model_class == PCAPreDensity:
        if model_kwargs["density_class"] == KernelDensity:
            return "pca_kde"
        elif model_kwargs["density_class"] == GaussianMixture:
            return "pca_gmm"
        else:
            return "pca_unknown"
    elif model_class == KernelDensity:
        return "kde"
    else:
        return "default" 
Example 3
Project: Boost-HiC   Author: LeopoldC   File: HiCutils.py    MIT License 6 votes vote down vote up
def kde_sklearn(reads, bandwidth = 1000, **kwargs):
	"""
	Kernel Density Estimation with Scikit-learn :
	Estimate the density from the reads distribution
	Entry :
		- reads, as the number of reads per bin
		- bandwith : width of the gaussien parameter (can be modified)
	Output :
		- returns the density as a matrix for each point from 0 to max(reads)
	"""
	x_grid = np.linspace(0, np.max(reads), int(np.max(reads)) + 1)
	kde_skl = KernelDensity(kernel = 'gaussian', bandwidth=bandwidth, **kwargs)
	kde_skl.fit(reads)
	# score_samples() returns the log-likelihood of the samples
	log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
	res = np.exp(log_pdf) / np.exp(log_pdf).sum()
	return res 
Example 4
Project: fexum   Author: KDD-OpenSource   File: tasks.py    MIT License 6 votes vote down vote up
def calculate_densities(target_feature_id, feature_id):
    feature = Feature.objects.get(pk=feature_id)
    target_feature = Feature.objects.get(pk=target_feature_id)

    df = _get_dataframe(feature.dataset.id)
    target_col = df[target_feature.name]
    categories = target_feature.categories

    def calc_density(category):
        kde = KernelDensity(kernel='gaussian', bandwidth=0.75)
        X = df[target_col == category][feature.name]
        # Fitting requires expanding dimensions
        X = np.expand_dims(X, axis=1)
        kde.fit(X)
        # We'd like to sample 100 values
        X_plot = np.linspace(feature.min, feature.max, 100)
        # We need the last dimension again
        X_plot = np.expand_dims(X_plot, axis=1)
        log_dens = kde.score_samples(X_plot)
        return np.exp(log_dens).tolist()

    return [{'target_class': category, 'density_values': calc_density(category)} for category in categories] 
Example 5
Project: sharp   Author: tfiers   File: data_summary.py    GNU General Public License v3.0 6 votes vote down vote up
def work(self):
        pos = self.reference_segs_all.center
        sig_length = self.reference_channel_full.duration
        kde = KernelDensity(bandwidth=4)
        kde.fit(as_data_matrix(pos))
        fig, ax = subplots(figsize=paperfig(1.2, 0.3))
        t = linspace(0, sig_length, num=10000)
        log_density = kde.score_samples(as_data_matrix(t))
        density_normalized = exp(log_density)
        density = density_normalized * len(pos)
        t_min = t / 60
        ax.plot(t_min, density)
        ax.fill_between(t_min, density, alpha=0.3)
        ax.set_xlabel("Time (min)")
        f = 2
        add_scalebar(ax, "v", f, f"{f} Hz", pos_along=0.05, pos_across=0.03)
        ax.set_yticks([])
        fig.tight_layout()
        self.output().write(fig) 
Example 6
Project: Weiss   Author: WangWenjun559   File: test_kde.py    Apache License 2.0 6 votes vote down vote up
def test_kernel_density(n_samples=100, n_features=3):
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features)
    Y = rng.randn(n_samples, n_features)

    for kernel in ['gaussian', 'tophat', 'epanechnikov',
                   'exponential', 'linear', 'cosine']:
        for bandwidth in [0.01, 0.1, 1]:
            dens_true = compute_kernel_slow(Y, X, kernel, bandwidth)

            def check_results(kernel, bandwidth, atol, rtol):
                kde = KernelDensity(kernel=kernel, bandwidth=bandwidth,
                                    atol=atol, rtol=rtol)
                log_dens = kde.fit(X).score_samples(Y)
                assert_allclose(np.exp(log_dens), dens_true,
                                atol=atol, rtol=max(1E-7, rtol))
                assert_allclose(np.exp(kde.score(Y)),
                                np.prod(dens_true),
                                atol=atol, rtol=max(1E-7, rtol))

            for rtol in [0, 1E-5]:
                for atol in [1E-6, 1E-2]:
                    for breadth_first in (True, False):
                        yield (check_results, kernel, bandwidth, atol, rtol) 
Example 7
Project: Weiss   Author: WangWenjun559   File: test_kde.py    Apache License 2.0 6 votes vote down vote up
def test_kde_algorithm_metric_choice():
    # Smoke test for various metrics and algorithms
    rng = np.random.RandomState(0)
    X = rng.randn(10, 2)    # 2 features required for haversine dist.
    Y = rng.randn(10, 2)

    for algorithm in ['auto', 'ball_tree', 'kd_tree']:
        for metric in ['euclidean', 'minkowski', 'manhattan',
                       'chebyshev', 'haversine']:
            if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
                assert_raises(ValueError, KernelDensity,
                              algorithm=algorithm, metric=metric)
            else:
                kde = KernelDensity(algorithm=algorithm, metric=metric)
                kde.fit(X)
                y_dens = kde.score_samples(Y)
                assert_equal(y_dens.shape, Y.shape[:1]) 
Example 8
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_kde.py    Apache License 2.0 6 votes vote down vote up
def test_kde_algorithm_metric_choice():
    # Smoke test for various metrics and algorithms
    rng = np.random.RandomState(0)
    X = rng.randn(10, 2)    # 2 features required for haversine dist.
    Y = rng.randn(10, 2)

    for algorithm in ['auto', 'ball_tree', 'kd_tree']:
        for metric in ['euclidean', 'minkowski', 'manhattan',
                       'chebyshev', 'haversine']:
            if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
                assert_raises(ValueError, KernelDensity,
                              algorithm=algorithm, metric=metric)
            else:
                kde = KernelDensity(algorithm=algorithm, metric=metric)
                kde.fit(X)
                y_dens = kde.score_samples(Y)
                assert_equal(y_dens.shape, Y.shape[:1]) 
Example 9
Project: plotnine   Author: has2k1   File: density.py    GNU General Public License v2.0 6 votes vote down vote up
def kde_sklearn(data, grid, **kwargs):
    """
    Kernel Density Estimation with Scikit-learn

    Parameters
    ----------
    data : numpy.array
        Data points used to compute a density estimator. It
        has `n x p` dimensions, representing n points and p
        variables.
    grid : numpy.array
        Data points at which the desity will be estimated. It
        has `m x p` dimensions, representing m points and p
        variables.

    Returns
    -------
    out : numpy.array
        Density estimate. Has `m x 1` dimensions
    """
    kde_skl = KernelDensity(**kwargs)
    kde_skl.fit(data)
    # score_samples() returns the log-likelihood of the samples
    log_pdf = kde_skl.score_samples(grid)
    return np.exp(log_pdf) 
Example 10
Project: lenstronomy   Author: sibirrer   File: kde_likelihood.py    MIT License 6 votes vote down vote up
def __init__(self, D_d_sample, D_delta_t_sample, kde_type='scipy_gaussian', bandwidth=1):
        """

        :param D_d_sample: 1-d numpy array of angular diameter distances to the lens plane
        :param D_delta_t_sample: 1-d numpy array of time-delay distances
        kde_type : string
            The kernel to use.  Valid kernels are
            'scipy_gaussian' or
            ['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']
            Default is 'gaussian'.
        :param bandwidth: width of kernel (in same units as the angular diameter quantities)
        """
        values = np.vstack([D_d_sample, D_delta_t_sample])
        if kde_type == 'scipy_gaussian':
            self._PDF_kernel = stats.gaussian_kde(values)
        else:
            from sklearn.neighbors import KernelDensity
            self._kde = KernelDensity(bandwidth=bandwidth, kernel=kde_type)
            values = np.vstack([D_d_sample, D_delta_t_sample])
            self._kde.fit(values.T)
        self._kde_type = kde_type 
Example 11
Project: ICML-2015   Author: Philip-Bachman   File: utils.py    MIT License 6 votes vote down vote up
def plot_kde_histogram(X, f_name, bins=25):
    """
    Plot KDE-smoothed histogram of the data in X. Assume data is univariate.
    """
    import matplotlib.pyplot as plt
    X_samp = X.ravel()[:,np.newaxis]
    X_min = np.min(X_samp)
    X_max = np.max(X_samp)
    X_range = X_max - X_min
    sigma = X_range / float(bins)
    plot_min = X_min - (X_range/3.0)
    plot_max = X_max + (X_range/3.0)
    plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
    # make a kernel density estimator for the data in X
    kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
    # make a figure
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(plot_X, np.exp(kde.score_samples(plot_X)))
    fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
        orientation='portrait', papertype=None, format=None, \
        transparent=False, bbox_inches=None, pad_inches=0.1, \
        frameon=None)
    plt.close(fig)
    return 
Example 12
Project: openml-pimp   Author: janvanrijn   File: priors.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, hyperparameter, param_name, data, oob_strategy='resample', bandwith=0.4):
        if oob_strategy not in ['resample', 'round', 'ignore']:
            raise ValueError()
        self.oob_strategy = oob_strategy
        self.param_name = param_name
        self.hyperparameter = hyperparameter
        reshaped = np.reshape(data, (len(data), 1))

        if self.hyperparameter.log:
            if isinstance(self.hyperparameter, UniformIntegerHyperparameter):
                # self.probabilities = {val: self.distrib.pdf(np.log2(val)) for val in range(self.hyperparameter.lower, self.hyperparameter.upper)}
                raise ValueError('Log Integer hyperparameter not supported: %s' %param_name)
            # self.distrib = gaussian_kde(np.log2(data))
            # self.distrib = KernelDensity(kernel='gaussian').fit(np.log2(np.reshape(data, (len(data), 1))))
            self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(np.log2(reshaped))
        else:
            # self.distrib = gaussian_kde(data)
            self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(reshaped)
        pass 
Example 13
Project: design_embeddings_jmd_2016   Author: IDEALLab   File: intrinsic_dim.py    MIT License 5 votes vote down vote up
def mide(X, n_neighbors=None, verbose=0):
    ''' Manifold intrinsic dimension estimator 
    Returns both global intrinsic dimensionality and local intrinsic dimensionality
    '''
    
    # Initial guess
    if n_neighbors is None:
        k_min, k_max = get_k_range(X)
        n_neighbors = (k_min + k_max)/2
    neigh = NearestNeighbors().fit(X)
    dist, nbrs = neigh.kneighbors(n_neighbors=n_neighbors, return_distance=True)
    local_dims = lmse(X, nbrs, verbose=verbose)
    
    if verbose:
        visualize_graph(X, nbrs)
#        plt.figure()
#        plt.plot(local_dims, 'o')
#        plt.title('Local intrinsic dimensions')
#        plt.xlabel('Samples')
#        plt.ylabel('Local ID')
#        plt.ylim(1,4)
#        plt.show()
    
    # Smoothing, this can correct the wrong local dimension estimations
    local_dims = np.array(local_dims)
    X_dims = np.concatenate((X, local_dims.reshape(-1,1)), axis=1)
    b = np.mean(dist[:,-1]) * 5
    kde = KernelDensity(kernel='epanechnikov', bandwidth=b).fit(X_dims)
    for i in range(len(local_dims)):
        Xi = np.concatenate((np.repeat(X[i].reshape(1,-1), len(np.unique(local_dims)), axis=0), 
                             np.unique(local_dims).reshape(-1,1)), axis=1)
        kde_scores = kde.score_samples(Xi)
        local_dims[i] = Xi[np.argmax(kde_scores), -1]
        
    if verbose == 2:
        print local_dims
        
    intr_dim = int(round(np.mean(local_dims), 0))
    
    return intr_dim, local_dims 
Example 14
Project: deepSVDD   Author: GSRS   File: kde.py    MIT License 5 votes vote down vote up
def initialize_kde(self, **kwargs):

        self.kde = KernelDensity(kernel=self.kernel, **kwargs)
        self.bandwidth = self.kde.bandwidth 
Example 15
Project: deepSVDD   Author: GSRS   File: kde.py    MIT License 5 votes vote down vote up
def train(self, bandwidth_GridSearchCV=True):

        if self.data._X_train.ndim > 2:
            X_train_shape = self.data._X_train.shape
            X_train = self.data._X_train.reshape(X_train_shape[0], -1)
        else:
            X_train = self.data._X_train

        print("Starting training...")
        self.start_clock()

        if bandwidth_GridSearchCV:
            # use grid search cross-validation to select bandwidth
            print("Using GridSearchCV for bandwidth selection...")

            d = X_train.shape[1]
            grid = np.logspace(-9, 20, num=30, base=2)
            params = {'bandwidth': (d / (2.0 * grid)) ** 0.5}

            hyper_kde = GridSearchCV(KernelDensity(kernel=self.kernel), params,
                                     n_jobs=10, cv=20, verbose=1)
            hyper_kde.fit(X_train)

            self.bandwidth = hyper_kde.best_estimator_.bandwidth
            self.kde = hyper_kde.best_estimator_
        else:
            # if exponential kernel, re-initialize kde with bandwidth minimizing
            # the numerical error
            if self.kernel == 'exponential':
                bandwidth = np.max(pairwise_distances(X_train)) ** 2
                self.kde = KernelDensity(kernel=self.kernel,
                                         bandwidth=bandwidth)

            self.kde.fit(X_train)

        self.stop_clock()
        self.train_time = self.clocked 
Example 16
Project: adversarial-policies   Author: HumanCompatibleAI   File: fit_density.py    MIT License 5 votes vote down vote up
def pca_kde():
    model_class = PCAPreDensity
    model_kwargs = {"density_class": KernelDensity}
    _ = locals()  # quieten flake8 unused variable warning
    del _ 
Example 17
Project: adversarial-policies   Author: HumanCompatibleAI   File: fit_density.py    MIT License 5 votes vote down vote up
def kde():
    model_class = KernelDensity
    _ = locals()  # quieten flake8 unused variable warning
    del _ 
Example 18
Project: PlotSummary   Author: MartinPaulEve   File: text.py    MIT License 5 votes vote down vote up
def kde(self, term, bandwidth=2000, samples=1000, kernel='gaussian'):

        """
        Estimate the kernel density of the instances of term in the text.

        Args:
            term (str): A stemmed term.
            bandwidth (int): The kernel bandwidth.
            samples (int): The number of evenly-spaced sample points.
            kernel (str): The kernel function.

        Returns:
            np.array: The density estimate.
        """

        # Get the offsets of the term instances.
        try:
            terms = np.array(self.terms[term])[:, np.newaxis]
        except:
            return 0

        # Fit the density estimator on the terms.
        kde = KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(terms)

        # Score an evely-spaced array of samples.
        x_axis = np.linspace(0, len(self.tokens), samples)[:, np.newaxis]
        scores = kde.score_samples(x_axis)

        # Scale the scores to integrate to 1.
        return np.exp(scores) * (len(self.tokens) / samples) 
Example 19
Project: imitation   Author: HumanCompatibleAI   File: density_baselines.py    MIT License 5 votes vote down vote up
def _fit_single_density(self, flat_transitions):
    # This bandwidth was chosen to make sense with standardised inputs that
    # have unit variance in each component. There might be a better way to
    # choose it automatically.
    density_model = KernelDensity(kernel=self.kernel,
                                  bandwidth=self.kernel_bandwidth)
    density_model.fit(flat_transitions)
    return density_model 
Example 20
Project: rainfall-teleconnections   Author: niklasboers   File: link_bundles_null_model.py    GNU General Public License v3.0 5 votes vote down vote up
def shperical_kde(values, xy, bw_opt):
   kde = KernelDensity(bandwidth=bw_opt, metric='haversine', kernel='gaussian', algorithm='ball_tree')
   kde.fit(values)
   datss = np.exp(kde.score_samples(xy))
   return datss 
Example 21
Project: rainfall-teleconnections   Author: niklasboers   File: link_bundles.py    GNU General Public License v3.0 5 votes vote down vote up
def shperical_kde(values, xy, bw_opt):
   kde = KernelDensity(bandwidth=bw_opt, metric='haversine', kernel='gaussian', algorithm='ball_tree')
   kde.fit(values)
   datss = np.exp(kde.score_samples(xy))
   return datss 
Example 22
Project: Deep-SVDD   Author: ErikKratzCth   File: kde.py    MIT License 5 votes vote down vote up
def initialize_kde(self, **kwargs):

        self.kde = KernelDensity(kernel=self.kernel, **kwargs)
        self.bandwidth = self.kde.bandwidth 
Example 23
Project: Deep-SVDD   Author: ErikKratzCth   File: kde.py    MIT License 5 votes vote down vote up
def train(self, bandwidth_GridSearchCV=True):

        if self.data._X_train.ndim > 2:
            X_train_shape = self.data._X_train.shape
            X_train = self.data._X_train.reshape(X_train_shape[0], -1)
        else:
            X_train = self.data._X_train

        print("Starting training...")
        self.start_clock()

        if bandwidth_GridSearchCV:
            # use grid search cross-validation to select bandwidth
            print("Using GridSearchCV for bandwidth selection...")

            # params = {'bandwidth': np.logspace(0.5, 5, num=10, base=2)}
            params = {'bandwidth': np.logspace(- 4.5, 5, num=20, base=2)}

            hyper_kde = GridSearchCV(KernelDensity(kernel=self.kernel), params, n_jobs=-1, cv=5, verbose=0)
            hyper_kde.fit(X_train)

            self.bandwidth = hyper_kde.best_estimator_.bandwidth
            self.kde = hyper_kde.best_estimator_
        else:
            # if exponential kernel, re-initialize kde with bandwidth minimizing
            # the numerical error
            if self.kernel == 'exponential':
                bandwidth = np.max(pairwise_distances(X_train)) ** 2
                self.kde = KernelDensity(kernel=self.kernel,
                                         bandwidth=bandwidth)

            self.kde.fit(X_train)

        self.stop_clock()
        self.train_time = self.clocked 
Example 24
Project: munk   Author: lrgr   File: plot_dissims.py    MIT License 5 votes vote down vote up
def plot_and_save(scores_and_labels, xlabel, output,
                  xmin=0.0,
                  xmax=0.6,
                  smoothness=20.,
                  font_size=12,
                  line_width=2):

    # create and save plot
    plt.figure()

    # create kernel density estimator
    kde = neighbors.KernelDensity(kernel='gaussian', bandwidth = xmax / smoothness)
    # need to add another dimension as required by sklearn
    # arrays passed to kde must be 2-dimensional
    X_plot = np.reshape(np.linspace(xmin, xmax, 500), (-1, 1))
    styles = ['-', '--', '-.', ':']
    for i, (xs, label) in enumerate(scores_and_labels):
        scores = np.ravel(xs) if len(xs) < 1e5 else np.random.choice(np.ravel(xs), int(1e5))
        kde.fit(np.reshape(scores, (-1, 1)))
        densities = kde.score_samples(X_plot)
        plt.plot(X_plot[:,0], np.exp(densities), lw = line_width,
                 label = label, ls=styles[i % len(styles)])
    plt.ylabel('Density', size = font_size)
    plt.xlabel(xlabel, size = font_size)
    plt.legend(loc='best', fontsize = font_size)
    plt.tight_layout()
    plt.savefig(output) 
Example 25
Project: Sequential-Generation   Author: Philip-Bachman   File: utils.py    MIT License 5 votes vote down vote up
def plot_kde_histogram(X, f_name, bins=25):
    """
    Plot KDE-smoothed histogram of the data in X. Assume data is univariate.
    """
    import matplotlib.pyplot as plt
    X = X.ravel()
    np.random.shuffle(X)
    X = X[0:min(X.shape[0], 1000000)]
    X_samp = X[:,np.newaxis]
    X_min = np.min(X_samp)
    X_max = np.max(X_samp)
    X_range = X_max - X_min
    sigma = X_range / float(bins)
    plot_min = X_min - (X_range/3.0)
    plot_max = X_max + (X_range/3.0)
    plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
    # make a kernel density estimator for the data in X
    kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
    # make a figure
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(plot_X, np.exp(kde.score_samples(plot_X)))
    fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
        orientation='portrait', papertype=None, format=None, \
        transparent=False, bbox_inches=None, pad_inches=0.1, \
        frameon=None)
    plt.close(fig)
    return 
Example 26
Project: Sequential-Generation   Author: Philip-Bachman   File: utils.py    MIT License 5 votes vote down vote up
def plot_kde_histogram2(X1, X2, f_name, bins=25):
    """
    Plot KDE-smoothed histogram of the data in X1/X2. Assume data is 1D.
    """
    import matplotlib.pyplot as plt
    # make a figure and configure an axis
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.hold(True)
    for (X, style) in [(X1, '-'), (X2, '--')]:
        X_samp = X.ravel()[:,np.newaxis]
        X_min = np.min(X_samp)
        X_max = np.max(X_samp)
        X_range = X_max - X_min
        sigma = X_range / float(bins)
        plot_min = X_min - (X_range/3.0)
        plot_max = X_max + (X_range/3.0)
        plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
        # make a kernel density estimator for the data in X
        kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
        ax.plot(plot_X, np.exp(kde.score_samples(plot_X)), linestyle=style)
    fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
        orientation='portrait', papertype=None, format=None, \
        transparent=False, bbox_inches=None, pad_inches=0.1, \
        frameon=None)
    plt.close(fig)
    return 
Example 27
Project: NFLWin   Author: AndrewRook   File: model.py    MIT License 5 votes vote down vote up
def _compute_predicted_percentages(actual_results, predicted_win_probabilities):
        """Compute the sample percentages from a validation data set.
        """
        kde_offense_won = KernelDensity(kernel='gaussian', bandwidth=0.01).fit(
            (predicted_win_probabilities[(actual_results == 1)])[:, np.newaxis])
        kde_total = KernelDensity(kernel='gaussian', bandwidth=0.01).fit(
            predicted_win_probabilities[:, np.newaxis])
        sample_probabilities = np.linspace(0.01, 0.99, 99)
        number_density_offense_won = np.exp(kde_offense_won.score_samples(sample_probabilities[:, np.newaxis])) * np.sum((actual_results))
        number_density_total = np.exp(kde_total.score_samples(sample_probabilities[:, np.newaxis])) * len(actual_results)
        number_offense_won = number_density_offense_won * np.sum(actual_results) / np.sum(number_density_offense_won)
        number_total = number_density_total * len(actual_results) / np.sum(number_density_total)
        predicted_win_percents = number_offense_won / number_total

        return 100.*sample_probabilities, 100.*predicted_win_percents, number_total 
Example 28
Project: replay_classification   Author: Eden-Kramer-Lab   File: decoders.py    GNU General Public License v3.0 5 votes vote down vote up
def __init__(
        self, n_place_bins=None, place_bin_size=1,
        replay_speedup_factor=20,
        replay_orders=_DEFAULT_REPLAY_ORDERS,
        time_bin_size=1, confidence_threshold=0.8, movement_std=0.5,
            model=KernelDensity, model_kwargs=_DEFAULT_MULTIUNIT_MODEL_KWARGS):
        super().__init__(n_place_bins, place_bin_size,
                         replay_speedup_factor,
                         replay_orders,
                         time_bin_size, confidence_threshold,
                         movement_std)
        self.model = model
        self.model_kwargs = model_kwargs 
Example 29
Project: global-divergences   Author: jeanfeydy   File: gradient_flow_1D.py    MIT License 5 votes vote down vote up
def display(x, color, list_save=None) :
    kde  = KernelDensity(kernel='gaussian', bandwidth= .005 ).fit(x.data.cpu().numpy())
    dens = np.exp( kde.score_samples(t_plot) )
    dens[0] = 0 ; dens[-1] = 0;
    plt.fill(t_plot, dens, color=color)
    if list_save is not None :
        list_save.append(dens.ravel()) # We'll save a csv at the end 
Example 30
Project: linear_neuron   Author: uglyboxer   File: test_grid_search.py    MIT License 5 votes vote down vote up
def test_gridsearch_no_predict():
    # test grid-search with an estimator without predict.
    # slight duplication of a test from KDE
    def custom_scoring(estimator, X):
        return 42 if estimator.bandwidth == .1 else 0
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    search = GridSearchCV(KernelDensity(),
                          param_grid=dict(bandwidth=[.01, .1, 1]),
                          scoring=custom_scoring)
    search.fit(X)
    assert_equal(search.best_params_['bandwidth'], .1)
    assert_equal(search.best_score_, 42) 
Example 31
Project: sharp   Author: tfiers   File: mountain_stats.py    GNU General Public License v3.0 5 votes vote down vote up
def plot_density(detector: DetectMountains, ax, time_grid):
    segs_list = detector.output().read()
    # Yeet mountains together over channels
    seg_starts = concatenate([segs.start for segs in segs_list])
    kde = KernelDensity(bandwidth=10)
    kde.fit(as_data_matrix(seg_starts))
    density = kde.score_samples(as_data_matrix(time_grid))
    ax.plot(time_grid / 60, density) 
Example 32
Project: sharp   Author: tfiers   File: offline_steps.py    GNU General Public License v3.0 5 votes vote down vote up
def plot_envelope_dist(self, ax: Axes):
        yrange = self.e_t.span
        sample = choice(self.e_t, 5000)
        kde = KernelDensity(bandwidth=0.02 * yrange)
        kde.fit(as_data_matrix(sample))
        e_dom = linspace(*self.e_t.range, 500)
        density = exp(kde.score_samples(as_data_matrix(e_dom)))
        ax.fill_betweenx(e_dom, density, color=envelope_color)
        ax.axhline(0, color="gray", lw=thin_lw)
        rm = self.reference_maker
        threshold_extent = 1.21
        kwargs = dict(
            xmin=0,
            xmax=threshold_extent,
            color=threshold_color,
            lw=thin_lw,
            clip_on=False,
        )
        ax.axhline(rm.ripple_threshold_high, **kwargs)
        ax.axhline(rm.ripple_threshold_low, **kwargs)
        ax.axhline(rm.envelope_median, linestyle=":", **kwargs)
        ax.set_xticks([])
        ax.set_yticks([])
        add_title(
            ax, "Empirical\ndistribution of $n_t$", envelope_color, x=0.1, y=0.8
        )
        text_kwargs = dict(
            x=threshold_extent + 0.05,
            color=threshold_color,
            transform=ax.get_yaxis_transform(),
            fontsize=0.69 * annotation_text_size,
            va="center",
        )
        ax.text(y=rm.ripple_threshold_high, s="$T_{high}$", **text_kwargs)
        ax.text(y=rm.ripple_threshold_low, s="$T_{low}$", **text_kwargs)
        ax.text(y=rm.envelope_median, s="Median", **text_kwargs) 
Example 33
Project: Weiss   Author: WangWenjun559   File: test_grid_search.py    Apache License 2.0 5 votes vote down vote up
def test_gridsearch_no_predict():
    # test grid-search with an estimator without predict.
    # slight duplication of a test from KDE
    def custom_scoring(estimator, X):
        return 42 if estimator.bandwidth == .1 else 0
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    search = GridSearchCV(KernelDensity(),
                          param_grid=dict(bandwidth=[.01, .1, 1]),
                          scoring=custom_scoring)
    search.fit(X)
    assert_equal(search.best_params_['bandwidth'], .1)
    assert_equal(search.best_score_, 42) 
Example 34
Project: Weiss   Author: WangWenjun559   File: test_kde.py    Apache License 2.0 5 votes vote down vote up
def test_kernel_density_sampling(n_samples=100, n_features=3):
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features)

    bandwidth = 0.2

    for kernel in ['gaussian', 'tophat']:
        # draw a tophat sample
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        samp = kde.sample(100)
        assert_equal(X.shape, samp.shape)

        # check that samples are in the right range
        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
        dist, ind = nbrs.kneighbors(X, return_distance=True)

        if kernel == 'tophat':
            assert np.all(dist < bandwidth)
        elif kernel == 'gaussian':
            # 5 standard deviations is safe for 100 samples, but there's a
            # very small chance this test could fail.
            assert np.all(dist < 5 * bandwidth)

    # check unsupported kernels
    for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        assert_raises(NotImplementedError, kde.sample, 100)

    # non-regression test: used to return a scalar
    X = rng.randn(4, 1)
    kde = KernelDensity(kernel="gaussian").fit(X)
    assert_equal(kde.sample().shape, (1, 1)) 
Example 35
Project: Weiss   Author: WangWenjun559   File: test_kde.py    Apache License 2.0 5 votes vote down vote up
def test_kde_badargs():
    assert_raises(ValueError, KernelDensity,
                  algorithm='blah')
    assert_raises(ValueError, KernelDensity,
                  bandwidth=0)
    assert_raises(ValueError, KernelDensity,
                  kernel='blah')
    assert_raises(ValueError, KernelDensity,
                  metric='blah')
    assert_raises(ValueError, KernelDensity,
                  algorithm='kd_tree', metric='blah') 
Example 36
Project: policosm   Author: ComplexCity   File: getKernelDensityEstimation.py    MIT License 5 votes vote down vote up
def getKernelDensityEstimation(nodes, metric='euclidean', metric_params=None, bbox=None, bandwidth=0.002, optimizeBandwidth=False, bwmin=0.0001, bwmax=0.01, crossValidation=20):
	lon = []
	lat = []
	for nlon,nlat in nodes:
		lon.append(nlon)
		lat.append(nlat)
	lon = np.array(lon)
	lat = np.array(lat)

	if bbox is None:
		xmin, xmax = min(lon), max(lon)
		ymin, ymax = min(lat), max(lat)
		bbox = [xmin, xmax, ymin, ymax]
	else:
		xmin, ymin, xmax, ymax = bbox[0],bbox[1],bbox[2],bbox[3]
		bbox = [xmin, xmax, ymin, ymax]

	x, y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
	positions = np.vstack([x.ravel(), y.ravel()])
	values = np.vstack([lon, lat])

	if optimizeBandwidth:
		grid = GridSearchCV(KernelDensity(kernel='gaussian', metric=metric, metric_params=metric_params, algorithm='ball_tree'), {'bandwidth': np.linspace(bwmin, bwmax, 30)}, cv=crossValidation) # 20-fold cross-validation
		grid.fit(zip(*values))

		bandwidth = grid.best_params_['bandwidth']
		kernel = grid.best_estimator_
	else:
		kernel = KernelDensity(kernel='gaussian', metric=metric, metric_params=metric_params, algorithm='ball_tree', bandwidth=bandwidth)
		kernel.fit(zip(*values))
	
	return kernel, positions, x, y, bbox, bandwidth 
Example 37
Project: policosm   Author: ComplexCity   File: getKernelDensityEstimationForDifferentGrids.py    MIT License 5 votes vote down vote up
def getKernelDensityEstimationForDifferentGrids(nodes, metric='euclidean', metric_params=None, bandwidth=0.002,
                                                optimizeBandwidth=False, bwmin=0.0001, bwmax=0.01, crossValidation=20,
                                                grid_sizes=None):
    lon = []
    lat = []
    for nlon, nlat in nodes:
        lon.append(nlon)
        lat.append(nlat)
    lon = np.array(lon)
    lat = np.array(lat)

    # bbox automatically calculated
    xmin, xmax = min(lon), max(lon)
    ymin, ymax = min(lat), max(lat)
    bbox = [xmin, xmax, ymin, ymax]

    # grid size every 100m, 250m, 500m
    # list of x and y for each grid sizes
    grids = grid_sizes if grid_sizes is not None else [100]
    xy = [np.mgrid[xmin:xmax:i, ymin:ymax:i] for i in grids]
    # list of grids
    positions = [np.vstack([x.ravel(), y.ravel()]) for x, y in xy]

    # build single D matrix for grid (positions) and data (values)
    values = np.vstack([lon, lat])

    if optimizeBandwidth:
        grid = GridSearchCV(
            KernelDensity(kernel='gaussian', metric=metric, metric_params=metric_params, algorithm='ball_tree'),
            {'bandwidth': np.linspace(bwmin, bwmax, 30)}, cv=crossValidation)  # 20-fold cross-validation
        grid.fit(zip(*values))

        bandwidth = grid.best_params_['bandwidth']
        kernel = grid.best_estimator_
    else:
        kernel = KernelDensity(kernel='gaussian', metric=metric, metric_params=metric_params, algorithm='ball_tree',
                               bandwidth=bandwidth)
        kernel.fit(zip(*values))

    return kernel, positions, xy, bbox, bandwidth 
Example 38
Project: policosm   Author: ComplexCity   File: testGetKernelDensityEstimation.py    MIT License 5 votes vote down vote up
def getKernelDensityEstimation(nodes, metric='euclidean', metric_params=None, bbox=None, bandwidth=0.002, optimizeBandwidth=False, bwmin=0.0001, bwmax=0.01, crossValidation=20):
	lon = []
	lat = []
	for nlon,nlat in nodes:
		lon.append(nlon)
		lat.append(nlat)
	lon = np.array(lon)
	lat = np.array(lat)

	if bbox is None:
		xmin, xmax = min(lon), max(lon)
		ymin, ymax = min(lat), max(lat)
		bbox = [xmin, xmax, ymin, ymax]
	else:
		xmin, ymin, xmax, ymax = bbox[0],bbox[1],bbox[2],bbox[3]
		bbox = [xmin, xmax, ymin, ymax]

	x, y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
	positions = np.vstack([x.ravel(), y.ravel()])
	values = np.vstack([lon, lat])

	if optimizeBandwidth:
		grid = GridSearchCV(KernelDensity(kernel='gaussian', metric=metric, metric_params=metric_params, algorithm='ball_tree'), {'bandwidth': np.linspace(bwmin, bwmax, 30)}, cv=crossValidation) # 20-fold cross-validation
		grid.fit(zip(*values))

		bandwidth = grid.best_params_['bandwidth']
		kernel = grid.best_estimator_
	else:
		kernel = KernelDensity(kernel='gaussian', metric=metric, metric_params=metric_params, algorithm='ball_tree', bandwidth=bandwidth)
		kernel.fit(zip(*values))
	
	return kernel, positions, x, y, bbox, bandwidth 
Example 39
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_search.py    Apache License 2.0 5 votes vote down vote up
def test_gridsearch_no_predict():
    # test grid-search with an estimator without predict.
    # slight duplication of a test from KDE
    def custom_scoring(estimator, X):
        return 42 if estimator.bandwidth == .1 else 0
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    search = GridSearchCV(KernelDensity(),
                          param_grid=dict(bandwidth=[.01, .1, 1]),
                          scoring=custom_scoring)
    search.fit(X)
    assert_equal(search.best_params_['bandwidth'], .1)
    assert_equal(search.best_score_, 42) 
Example 40
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_grid_search.py    Apache License 2.0 5 votes vote down vote up
def test_gridsearch_no_predict():
    # test grid-search with an estimator without predict.
    # slight duplication of a test from KDE
    def custom_scoring(estimator, X):
        return 42 if estimator.bandwidth == .1 else 0
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    search = GridSearchCV(KernelDensity(),
                          param_grid=dict(bandwidth=[.01, .1, 1]),
                          scoring=custom_scoring)
    search.fit(X)
    assert_equal(search.best_params_['bandwidth'], .1)
    assert_equal(search.best_score_, 42) 
Example 41
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_kde.py    Apache License 2.0 5 votes vote down vote up
def check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true):
    kde = KernelDensity(kernel=kernel, bandwidth=bandwidth,
                        atol=atol, rtol=rtol)
    log_dens = kde.fit(X).score_samples(Y)
    assert_allclose(np.exp(log_dens), dens_true,
                    atol=atol, rtol=max(1E-7, rtol))
    assert_allclose(np.exp(kde.score(Y)),
                    np.prod(dens_true),
                    atol=atol, rtol=max(1E-7, rtol)) 
Example 42
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_kde.py    Apache License 2.0 5 votes vote down vote up
def test_kernel_density_sampling(n_samples=100, n_features=3):
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features)

    bandwidth = 0.2

    for kernel in ['gaussian', 'tophat']:
        # draw a tophat sample
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        samp = kde.sample(100)
        assert_equal(X.shape, samp.shape)

        # check that samples are in the right range
        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
        dist, ind = nbrs.kneighbors(X, return_distance=True)

        if kernel == 'tophat':
            assert np.all(dist < bandwidth)
        elif kernel == 'gaussian':
            # 5 standard deviations is safe for 100 samples, but there's a
            # very small chance this test could fail.
            assert np.all(dist < 5 * bandwidth)

    # check unsupported kernels
    for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        assert_raises(NotImplementedError, kde.sample, 100)

    # non-regression test: used to return a scalar
    X = rng.randn(4, 1)
    kde = KernelDensity(kernel="gaussian").fit(X)
    assert_equal(kde.sample().shape, (1, 1)) 
Example 43
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_kde.py    Apache License 2.0 5 votes vote down vote up
def test_kde_pipeline_gridsearch():
    # test that kde plays nice in pipelines and grid-searches
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    pipe1 = make_pipeline(StandardScaler(with_mean=False, with_std=False),
                          KernelDensity(kernel="gaussian"))
    params = dict(kerneldensity__bandwidth=[0.001, 0.01, 0.1, 1, 10])
    search = GridSearchCV(pipe1, param_grid=params, cv=5)
    search.fit(X)
    assert_equal(search.best_params_['kerneldensity__bandwidth'], .1) 
Example 44
Project: DGPs_with_IWVI   Author: hughsalimbeni   File: demo.py    Apache License 2.0 5 votes vote down vote up
def plot_density(model, path):
    N_samples = 10000

    samples = model.predict_y_samples(Xs, N_samples, session=sess)[:, :, 0]
    # objective = np.average([model.compute_log_likelihood() for _ in range(1000)])

    fig, ax = plt.subplots(1, 1, figsize=(6, 6))
    ax.scatter(X, Y, marker='.', color='C1')
    levels = np.linspace(-1, 2, 200)
    ax.set_ylim(min(levels), max(levels))
    ax.set_xlim(min(Xs), max(Xs))


    cs = np.zeros((len(Xs), len(levels)))
    for i, Ss in enumerate(samples.T):
        bandwidth = 1.06 * np.std(Ss) * len(Ss) ** (-1. / 5)  # Silverman's (1986) rule of thumb.
        kde = KernelDensity(bandwidth=float(bandwidth))

        kde.fit(Ss.reshape(-1, 1))
        for j, level in enumerate(levels):
            cs[i, j] = kde.score(np.array(level).reshape(1, 1))
    ax.pcolormesh(Xs.flatten(), levels, np.exp(cs.T), cmap='Blues_r')  # , alpha=0.1)
    ax.scatter(X, Y, marker='.', color='C1')

    plt.savefig(os.path.join(path, 'density_{:03d}.png'.format(k)))
    plt.close() 
Example 45
Project: Emergence   Author: LennonLab   File: SAD-Models.py    MIT License 5 votes vote down vote up
def CV_KDE(oneD_array, select_bandwidth = True):
    # remove +/- inf
    oneD_array = oneD_array[np.logical_not(np.isnan(oneD_array))]
    grid = GridSearchCV(KernelDensity(),
                    {'bandwidth': np.logspace(0.1, 5.0, 30)},
                    cv=20) # 20-fold cross-validation
    grid.fit(oneD_array[:, None])
    x_grid = np.linspace(np.amin(oneD_array), np.amax(oneD_array), 10000)
    kde = grid.best_estimator_
    pdf = np.exp(kde.score_samples(x_grid[:, None]))
    # returns grod for x-axis,  pdf, and bandwidth
    return_tuple = (x_grid, pdf, kde.bandwidth)
    return return_tuple 
Example 46
Project: airfoil-opt-gan   Author: IDEALLab   File: utils.py    MIT License 5 votes vote down vote up
def optimize_kde(X):
    # use grid search cross-validation to optimize the bandwidth
    params = {'bandwidth': np.logspace(-3, 1, 20)}
    grid = GridSearchCV(KernelDensity(), params, n_jobs=8, cv=5, verbose=1)
    grid.fit(X)
    
    print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
    
    # use the best estimator to compute the kernel density estimate
    kde = grid.best_estimator_
    
    return kde 
Example 47
Project: kenchi   Author: HazureChi   File: statistical.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _fit(self, X):
        self.estimator_   = KernelDensity(
            algorithm     = self.algorithm,
            atol          = self.atol,
            bandwidth     = self.bandwidth,
            breadth_first = self.breadth_first,
            kernel        = self.kernel,
            leaf_size     = self.leaf_size,
            metric        = self.metric,
            rtol          = self.rtol,
            metric_params = self.metric_params
        ).fit(X)

        return self 
Example 48
Project: ICML-2015   Author: Philip-Bachman   File: utils.py    MIT License 5 votes vote down vote up
def plot_kde_histogram2(X1, X2, f_name, bins=25):
    """
    Plot KDE-smoothed histogram of the data in X1/X2. Assume data is 1D.
    """
    import matplotlib.pyplot as plt
    # make a figure and configure an axis
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.hold(True)
    for (X, style) in [(X1, '-'), (X2, '--')]:
        X_samp = X.ravel()[:,np.newaxis]
        X_min = np.min(X_samp)
        X_max = np.max(X_samp)
        X_range = X_max - X_min
        sigma = X_range / float(bins)
        plot_min = X_min - (X_range/3.0)
        plot_max = X_max + (X_range/3.0)
        plot_X = np.linspace(plot_min, plot_max, 1000)[:,np.newaxis]
        # make a kernel density estimator for the data in X
        kde = KernelDensity(kernel='gaussian', bandwidth=sigma).fit(X_samp)
        ax.plot(plot_X, np.exp(kde.score_samples(plot_X)), linestyle=style)
    fig.savefig(f_name, dpi=None, facecolor='w', edgecolor='w', \
        orientation='portrait', papertype=None, format=None, \
        transparent=False, bbox_inches=None, pad_inches=0.1, \
        frameon=None)
    plt.close(fig)
    return 
Example 49
Project: gains   Author: wesleybeckner   File: adaptive.py    MIT License 5 votes vote down vote up
def gaussian_pdf(column):
    x = column.values
    x_d = np.linspace(min(x), max(x), 10000)

    # instantiate and fit the KDE model
    kde = KernelDensity(bandwidth=0.01, kernel='gaussian')
    kde.fit(x[:, None])

    # score_samples returns the log of the probability density
    return kde.score_samples(x_d[:, None]), x_d 
Example 50
Project: gains   Author: wesleybeckner   File: adaptive.py    MIT License 5 votes vote down vote up
def gaussian_pdf(column):
    x = column.values
    x_d = np.linspace(min(x), max(x), 10000)

    # instantiate and fit the KDE model
    kde = KernelDensity(bandwidth=0.01, kernel='gaussian')
    kde.fit(x[:, None])

    # score_samples returns the log of the probability density
    return kde.score_samples(x_d[:, None]), x_d 
Example 51
Project: kaggle-dstl-satellite-imagery-feature-detection   Author: u1234x1234   File: b3_data_iter.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, masks):
        n_class = 10
        self.maps_with_class = [[], [], [], [], [], [], [], [], [], []]
        self.kde_samplers = []
        self.class_probs = np.ones(n_class) / n_class
#        self.class_probs = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.5])
        self.mask_size = None
        ts = time.time()
        for mask_i, mask in enumerate(masks):
            assert mask.shape[2] == n_class
            if not self.mask_size:
                self.mask_size = mask.shape[1]
            samplers = []
            for class_i in range(n_class):
                X = np.nonzero(mask[:, :, class_i])
                X = np.stack(X, axis=1)

#                np.random.shuffle(X)
#                X = X[:50000]

                if not X.size:
                    samplers.append(None)
                else:
                    self.maps_with_class[class_i].append(mask_i)
                    sampler = neighbors.KernelDensity(self.mask_size * 0.02).fit(X)
                    samplers.append(sampler)

            assert len(samplers) == n_class
            self.kde_samplers.append(samplers)
        print('sampler init time: {}'.format(time.time() - ts)) 
Example 52
Project: Deep-SAD-PyTorch   Author: lukasruff   File: kde.py    MIT License 4 votes vote down vote up
def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0,
              bandwidth_GridSearchCV: bool = True):
        """Trains the Kernel Density Estimation model on the training data."""
        logger = logging.getLogger()

        # do not drop last batch for non-SGD optimization shallow_ssad
        train_loader = DataLoader(dataset=dataset.train_set, batch_size=128, shuffle=True,
                                  num_workers=n_jobs_dataloader, drop_last=False)

        # Get data from loader
        X = ()
        for data in train_loader:
            inputs, _, _, _ = data
            inputs = inputs.to(device)
            if self.hybrid:
                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
            X += (X_batch.cpu().data.numpy(),)
        X = np.concatenate(X)

        # Training
        logger.info('Starting training...')
        start_time = time.time()

        if bandwidth_GridSearchCV:
            # use grid search cross-validation to select bandwidth
            logger.info('Using GridSearchCV for bandwidth selection...')
            params = {'bandwidth': np.logspace(0.5, 5, num=10, base=2)}
            hyper_kde = GridSearchCV(KernelDensity(kernel=self.kernel), params, n_jobs=self.n_jobs, cv=5, verbose=0)
            hyper_kde.fit(X)
            self.bandwidth = hyper_kde.best_estimator_.bandwidth
            logger.info('Best bandwidth: {:.8f}'.format(self.bandwidth))
            self.model = hyper_kde.best_estimator_
        else:
            # if exponential kernel, re-initialize kde with bandwidth minimizing the numerical error
            if self.kernel == 'exponential':
                self.bandwidth = np.max(pairwise_distances(X)) ** 2
                self.model = KernelDensity(kernel=self.kernel, bandwidth=self.bandwidth)

            self.model.fit(X)

        train_time = time.time() - start_time
        self.results['train_time'] = train_time

        logger.info('Training Time: {:.3f}s'.format(self.results['train_time']))
        logger.info('Finished training.') 
Example 53
Project: WeightedLoss_Convolutional_Pose_Machines_PyTorch   Author: JindongJiang   File: cpm_data.py    MIT License 4 votes vote down vote up
def __init__(self, root_dir, transform=None, phase_train=True,
                 weighted_loss=False, bandwidth=50):
        self.scaled_h = 368
        self.scaled_w = 368
        self.map_h = 45
        self.map_w = 45
        self.guassian_sigma = 21
        self.num_keypoints = 14
        self.num_train = 11000
        global lms, imagefiles, weight
        if lms is None or imagefiles is None or weight is None:
            mat_lsp = scipy.io.loadmat(os.path.join(root_dir, 'lsp_dataset/joints.mat'),
                                       squeeze_me=True, struct_as_record=False)['joints']
            mat_lspet = scipy.io.loadmat(os.path.join(root_dir, 'lspet_dataset/joints.mat'),
                                         squeeze_me=True, struct_as_record=False)['joints']
            image_lsp = np.array(glob.glob(os.path.join(root_dir,
                                                        'lsp_dataset/images/*.jpg'), recursive=True))
            image_lspet = np.array(glob.glob(os.path.join(root_dir,
                                                          'lspet_dataset/images/*.jpg'), recursive=True))
            image_nums_lsp = np.array([float(s.rsplit('/')[-1][2:-4]) for s in image_lsp])
            image_nums_lspet = np.array([float(s.rsplit('/')[-1][2:-4]) for s in image_lspet])
            sorted_image_lsp = image_lsp[np.argsort(image_nums_lsp)]
            sorted_image_lspet = image_lspet[np.argsort(image_nums_lspet)]

            self.lms = np.append(mat_lspet.transpose([2, 1, 0])[:, :2, :],
                                 # only the x, y coords, not the "block or not" channel
                                 mat_lsp.transpose([2, 0, 1])[:, :2, :],
                                 axis=0)
            self.imagefiles = np.append(sorted_image_lspet, sorted_image_lsp)
            imgs_shape = []
            for img_file in self.imagefiles:
                imgs_shape.append(Image.open(img_file).size)
            lms_scaled = self.lms / np.array(imgs_shape)[:, :, np.newaxis]
            self.weight = np.logical_and(lms_scaled > 0, lms_scaled <= 1).astype(np.float32)
            self.weight = self.weight[:, 0, :] * self.weight[:, 1, :]
            self.weight = np.append(self.weight, np.ones((self.weight.shape[0], 1)), axis=1)
            self.weight = self.weight[:, np.newaxis, :].repeat(6, 1)
            if weighted_loss and phase_train:
                datas = lms_scaled[:self.num_train].reshape(self.num_train, -1)
                datas[datas < 0] = 0
                datas[datas > 1] = 0
                datas_pca = PCA(n_components=3).fit_transform(datas)
                kde = KernelDensity(bandwidth=bandwidth).fit(datas_pca)
                p = np.exp(kde.score_samples(datas_pca))
                p_median = np.median(p)
                p_weighted = p_median / p
                self.weight[:self.num_train] *= p_weighted[:, np.newaxis, np.newaxis]
            lms = self.lms
            imagefiles = self.imagefiles
            weight = self.weight
        else:
            self.lms = lms
            self.imagefiles = imagefiles
            self.weight = weight

        self.transform = transform
        self.phase_train = phase_train 
Example 54
Project: RQpy   Author: ucbpylegroup   File: _random_sampling.py    GNU General Public License v3.0 4 votes vote down vote up
def _sample_from_kde(data, xrange, kernel, bw_method, bandwidths, cv, npoints, plot_pdf):
    """
    Helper function for sampling from a kernel density estimate of `data`. Returns a
    function that can be used for inverse transform sampling. See `sample_from_data`
    for detailed documentation.

    """

    if xrange is not None:
        data = data[rp.inrange(data, xrange[0], xrange[1])]

    ndata = len(data)

    if bw_method == 'scott':
        bandwidth = ndata**(-1 / 5) * np.std(data, ddof=1)
    elif bw_method == 'silverman':
        bandwidth = (ndata * 3 / 4)**(-1 / 5) * np.std(data, ddof=1)
    elif bw_method == 'cv':
        if bandwidths is None:
            bandwidths = np.std(data, ddof=1) ** np.linspace(-1, 1, 100)
        grid = GridSearchCV(KernelDensity(), {'bandwidth': bandwidths}, cv=cv)
        grid.fit(data[:, np.newaxis])
        bandwidth = grid.best_params_['bandwidth']
    elif np.isscalar(bw_method):
        bandwidth = bw_method
    else:
        raise ValueError("Unrecognized input for bw_method.")

    x_interp = np.linspace(np.min(data), np.max(data), num=npoints)

    kde = KernelDensity(bandwidth=bandwidth, kernel=kernel).fit(data[:, np.newaxis])
    pdf = np.exp(kde.score_samples(x_interp[:, np.newaxis]))
    
    cdf = integrate.cumtrapz(pdf, x=x_interp, initial=0)
    cdf /= cdf[-1]

    if plot_pdf:
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.plot(x_interp, pdf, color='k', label='Estimated PDF')
        ax.hist(
            data,
            bins='auto',
            range=xrange,
            histtype='step',
            density=True,
            color='k',
            alpha=0.3,
            linewidth=2,
            label="Data",
        )
        ax.set_xlim(x_interp[0], x_interp[-1])
        ax.set_ylim(bottom=0)
        ax.grid()
        ax.tick_params(which='both', direction='in', right=True, top=True)
        ax.set_title(f"Estimated PDF of data from KDE: bandwidth = {bandwidth:.2e}")
        ax.legend()

    inv_cdf = interpolate.interp1d(cdf, x_interp)

    return inv_cdf 
Example 55
Project: mousestyles   Author: berkeley-stat222   File: kde.py    BSD 2-Clause "Simplified" License 4 votes vote down vote up
def kde(x, x_grid, symmetric_correction=False, cutoff=1):
    """
    Return a numpy.ndarray object of estimated density

    Parameters
    ----------
    x      : numpy.ndarray
        data, as realiztions of variable X
    x_grid : numpy.ndarray
        the grid points for the estimated density
    symmetric_correction : boolean
        a method indicator. If False, do common gaussian kernel density
        estimation (kde). If True, do common gaussian kde on data generated
        from x concatenating with its reflection around the cutoff point. Then
        transform the estimated kde back by a factor of 2. Used for e.g. kde
        for nonnegative kernel estimation
    cutoff : float
        the axis of symmetry for symmetric correction
    Returns
    -------
    pdf : numpy.ndarray
        estimated density at the specified grid points x_grid

    Examples
    --------
    >>> kde(x = np.array([2,3,1,0]), x_grid=np.linspace(0, 5, 10))
    array([ 0.17483395,  0.21599529,  0.23685855,  0.24007961,  0.22670763,
        0.19365019,  0.14228937,  0.08552725,  0.04043597,  0.01463953])
    >>> x1 = np.concatenate([norm(-1, 1.).rvs(400), norm(1, 0.3).rvs(100)])
    >>> pdf1 = kde(x=x1, x_grid=np.linspace(0, 5, 100), symmetric_correction
                   =True, cutoff=1)
    array([ 0.26625297,  0.26818492,  0.27105849,  0.27489486,  0.27968752, ...
        0.07764054,  0.07239964,  0.06736559,  0.06254175,  0.05793043])
    """
    # if we want to do a symmetric correction to do kde, we transform
    # the data to be the symmetric format by adding the counterpart
    # obtained by reflecting the data around x=cutoff
    if symmetric_correction:
        x = np.concatenate([x, 2*cutoff - x])

    # Use GridSearchCV to search for the best bandwidth by 5 fold
    # cross-validation max loglikelihood
    grid = GridSearchCV(KernelDensity(),
                        {'bandwidth': np.linspace(0.01, 0.5, 10)},
                        cv=min(5, len(x)))
    grid.fit(x[:, None])
    bandwidth = grid.best_params_['bandwidth']

    # do desity estimation
    kde_skl = KernelDensity(bandwidth=bandwidth)
    kde_skl.fit(x[:, np.newaxis])
    log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])

    # transform the fuction score function to density
    if symmetric_correction:
        # transform back to the one-sided density in symmetric correction
        pdf = 2 * np.exp(log_pdf)
    else:
        # vanilla case
        pdf = np.exp(log_pdf)
    return pdf