Python sklearn.cluster.estimate_bandwidth() Examples

The following are 11 code examples of sklearn.cluster.estimate_bandwidth(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.cluster , or try the search function .
Example #1
Source File: clustering_meanShift.py    From practicalDataAnalysisCookbook with GNU General Public License v2.0 6 votes vote down vote up
def findClusters_meanShift(data):
    '''
        Cluster data using Mean Shift method
    '''
    bandwidth = cl.estimate_bandwidth(data, 
        quantile=0.25, n_samples=500)

    # create the classifier object
    meanShift = cl.MeanShift(
        bandwidth=bandwidth,
        bin_seeding=True
    )

    # fit the data
    return meanShift.fit(data)

# the file name of the dataset 
Example #2
Source File: utils.py    From sparseprop with MIT License 6 votes vote down vote up
def get_typical_durations(raw_durations, bandwidth_percentile=0.05, 
                       min_intersection=0.5, miss_covered=0.1):
    """Return typical durations in a dataset."""
    dur = (raw_durations).reshape(raw_durations.shape[0], 1)
    bandwidth = estimate_bandwidth(dur, quantile=bandwidth_percentile)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=False)
    ms.fit(dur.reshape((dur.shape[0]), 1))
    tw = np.sort(np.array(
        ms.cluster_centers_.reshape(ms.cluster_centers_.shape[0]), dtype=int))
    # Warranty a min intersection in the output durations.
    p = np.zeros((dur.shape[0], tw.shape[0]))
    for idx in range(tw.shape[0]):
        p[:, idx] = (dur/tw[idx]).reshape(p[:,idx].shape[0])
    ll = (p>=min_intersection) & (p<=1.0/min_intersection)
    if (ll.sum(axis=1)>0).sum() / float(raw_durations.shape[0]) < (1.0-miss_covered):
        assert False, "Condition of minimum intersection not satisfied"
    return tw 
Example #3
Source File: test_mean_shift.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_estimate_bandwidth_with_sparse_matrix():
    # Test estimate_bandwidth with sparse matrix
    X = sparse.lil_matrix((1000, 1000))
    msg = "A sparse matrix was passed, but dense data is required."
    assert_raise_message(TypeError, msg, estimate_bandwidth, X, 200) 
Example #4
Source File: shifted_delta_cepstra.py    From hunspeech with MIT License 5 votes vote down vote up
def loop_estimate_bandwidth():
        len_ = 4
        while  len_ < self.sdc_all_speech.shape[0]:
            logging.info((len_,
                          estimate_bandwidth(self.sdc_all_speech[:len_])))
            len_ *= 2 
Example #5
Source File: test_mean_shift.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_estimate_bandwidth():
    # Test estimate_bandwidth
    bandwidth = estimate_bandwidth(X, n_samples=200)
    assert 0.9 <= bandwidth <= 1.5 
Example #6
Source File: test_mean_shift.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_estimate_bandwidth_1sample():
    # Test estimate_bandwidth when n_samples=1 and quantile<1, so that
    # n_neighbors is set to 1.
    bandwidth = estimate_bandwidth(X, n_samples=1, quantile=0.3)
    assert bandwidth == 0. 
Example #7
Source File: test_mean_shift.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_estimate_bandwidth_with_sparse_matrix():
    # Test estimate_bandwidth with sparse matrix
    X = sparse.lil_matrix((1000, 1000))
    msg = "A sparse matrix was passed, but dense data is required."
    assert_raise_message(TypeError, msg, estimate_bandwidth, X, 200) 
Example #8
Source File: ml.py    From forex_algotrading with MIT License 5 votes vote down vote up
def main(filename):
	# read csv files with daily data per tick
    df = pandas.read_csv(filename, parse_dates=[0], index_col=0, names=['Date_Time', 'Buy', 'Sell'],
                         date_parser=lambda x: pandas.to_datetime(x, format="%d/%m/%y %H:%M:%S"))

    # group by day and drop NA values (usually weekends)
    grouped_data = df.dropna()
    ticks_data = grouped_data['Sell'].resample('24H').ohlc()
    
    # use 'ask'
    sell_data = grouped_data.as_matrix(columns=['Sell'])

    # calculate bandwidth (expirement with quantile and samples)
    bandwidth = estimate_bandwidth(sell_data, quantile=0.1, n_samples=100)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)

    # fit the data
    ms.fit(sell_data)

    ml_results = []
    for k in range(len(np.unique(ms.labels_))):
        my_members = ms.labels_ == k
        values = sell_data[my_members, 0]    

        # find the edges
        ml_results.append(min(values))
        ml_results.append(max(values))

    # export the data for the visualizations
    ticks_data.to_json('ticks.json', date_format='iso', orient='index')

    # export ml support resisistance
    with open('ml_results.json', 'w') as f:
        f.write(json.dumps(ml_results))
    

    print("Done. Goto 0.0.0.0:8000/chart.html") 
Example #9
Source File: test_cluster.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_estimate_bandwidth(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.cluster.estimate_bandwidth(random_state=self.random_state)
        expected = cluster.estimate_bandwidth(iris.data, random_state=self.random_state)
        self.assertEqual(result, expected) 
Example #10
Source File: mechanical.py    From CO2MPAS-TA with European Union Public License 1.1 5 votes vote down vote up
def identify_velocity_speed_ratios_v3(
        engine_speeds_out, velocities, idle_engine_speed, stop_velocity):
    """
    Identifies velocity speed ratios from gear box speed vector [km/(h*RPM)].

    :param engine_speeds_out:
        Engine speed [RPM].
    :type engine_speeds_out: numpy.array

    :param velocities:
        Velocity vector [km/h].
    :type velocities: numpy.array

    :param idle_engine_speed:
        Engine speed idle median and std [RPM].
    :type idle_engine_speed: (float, float)

    :param stop_velocity:
        Maximum velocity to consider the vehicle stopped [km/h].
    :type stop_velocity: float

    :return:
        Constant velocity speed ratios of the gear box [km/(h*RPM)].
    :rtype: dict
    """
    import sklearn.cluster as sk_clu

    idle_speed = idle_engine_speed[0] + idle_engine_speed[1]

    b = (engine_speeds_out > idle_speed) & (velocities > stop_velocity)
    x = (velocities[b] / engine_speeds_out[b])[:, None]

    bandwidth = sk_clu.estimate_bandwidth(x, quantile=0.2)
    ms = sk_clu.MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(x)

    vsr = {k + 1: v for k, v in enumerate(sorted(ms.cluster_centers_[:, 0]))}

    vsr[0] = 0.0

    return vsr 
Example #11
Source File: test_mean_shift.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_estimate_bandwidth():
    # Test estimate_bandwidth
    bandwidth = estimate_bandwidth(X, n_samples=200)
    assert_true(0.9 <= bandwidth <= 1.5)