Python scipy.spatial.distance.hamming() Examples

The following are 14 code examples of scipy.spatial.distance.hamming(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.spatial.distance , or try the search function .
Example #1
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 7 votes vote down vote up
def test_multilabel_hamming_loss():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])
    w = np.array([1, 3])

    assert_equal(hamming_loss(y1, y2), 1 / 6)
    assert_equal(hamming_loss(y1, y1), 0)
    assert_equal(hamming_loss(y2, y2), 0)
    assert_equal(hamming_loss(y2, 1 - y2), 1)
    assert_equal(hamming_loss(y1, 1 - y1), 1)
    assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6)
    assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5)
    assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12)
    assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12)
    assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3)
    # sp_hamming only works with 1-D arrays
    assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0]))
    assert_warns_message(DeprecationWarning,
                         "The labels parameter is unused. It was"
                         " deprecated in version 0.21 and"
                         " will be removed in version 0.23",
                         hamming_loss, y1, y2, labels=[0, 1]) 
Example #2
Source File: cost_sensitive_reference_pair_encoding.py    From libact with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def make_query(self):
        dataset = self.dataset

        unlabeled_entry_ids, X_pool = dataset.get_unlabeled_entries()
        X_pool = np.asarray(X_pool)

        self.csrpe_.train(dataset)
        self.model_.train(dataset)

        predY = self.model_.predict(X_pool)
        Z = self.csrpe_.predicted_code(X_pool)
        predZ = self.csrpe_.encode(predY)

        dist = paired_distances(Z, predZ, metric=hamming) # z1 z2
        dist2 = self.csrpe_.predict_dist(X_pool) # z1 zt
        #dist3 = self.csrpe.distance(predZ) # z2 zt

        dist = dist + dist2
        #dist = dist + dist3

        ask_id = self.random_state_.choice(
            np.where(np.isclose(dist, np.max(dist)))[0])

        return unlabeled_entry_ids[ask_id] 
Example #3
Source File: classification.py    From brainiak with Apache License 2.0 6 votes vote down vote up
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj]
    rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj]
    clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels,
            num_training_samples=num_epochs_per_subj*(num_subjects-1))
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[0:num_epochs_per_subj]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels)) 
Example #4
Source File: classification.py    From brainiak with Apache License 2.0 6 votes vote down vote up
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])),
            labels[0:num_training_samples])
    X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:]))
    predict = clf.predict(X)
    print(predict)
    print(clf.decision_function(X))
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(X, test_labels)) 
Example #5
Source File: classification.py    From brainiak with Apache License 2.0 6 votes vote down vote up
def example_of_correlating_two_components_aggregating_sim_matrix(raw_data, raw_data2, labels,
                                                                 num_subjects, num_epochs_per_subj):
    # aggregate the kernel matrix to save memory
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj)
    num_training_samples=num_epochs_per_subj*(num_subjects-1)
    clf.fit(list(zip(raw_data, raw_data2)), labels,
            num_training_samples=num_training_samples)
    predict = clf.predict()
    print(predict)
    print(clf.decision_function())
    test_labels = labels[num_training_samples:]
    incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
    logger.info(
        'when aggregating the similarity matrix to save memory, '
        'the accuracy is %d / %d = %.2f' %
        (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
         (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
    )
    # when the kernel matrix is computed in portion, the test data is already in
    print(clf.score(None, test_labels))

# python3 classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy 
Example #6
Source File: test_libdist.py    From enspara with GNU General Public License v3.0 6 votes vote down vote up
def test_hamming_distance():

    dtypes = ['|S1']
    for elem_size in ['8', '16', '32', '64']:
        for int_type in ['int', 'uint']:
            dtypes.append(int_type + elem_size)

    for dtype in dtypes:
        X = np.array([[1, 3, 8],
                      [3, 1, 8],
                      [1, 1, 7]]).astype(dtype)
        y = np.array([1, 2, 3]).astype(dtype)

        d_expected = np.zeros((len(X)))
        for i in range(len(X)):
            d_expected[i] = scipy_hamming(X[i], y)

        d_enspara = libdist.hamming(X, y)

        assert_array_equal(d_expected, d_enspara) 
Example #7
Source File: test_classification.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_multilabel_hamming_loss():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])
    w = np.array([1, 3])

    assert_equal(hamming_loss(y1, y2), 1 / 6)
    assert_equal(hamming_loss(y1, y1), 0)
    assert_equal(hamming_loss(y2, y2), 0)
    assert_equal(hamming_loss(y2, 1 - y2), 1)
    assert_equal(hamming_loss(y1, 1 - y1), 1)
    assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6)
    assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5)
    assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12)
    assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12)
    assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3)
    # sp_hamming only works with 1-D arrays
    assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0]))
    assert_warns(DeprecationWarning, hamming_loss, y1, y2, classes=[0, 1]) 
Example #8
Source File: plotting.py    From kvae with MIT License 5 votes vote down vote up
def plot_trajectory_uncertainty(true, gen, filter, smooth, filename):
    sequences, timesteps, h, w = true.shape

    errors = dict(Generated=list(), Filtered=list(), Smoothed=list())
    for label, var in zip(('Generated', 'Filtered', 'Smoothed'), (gen, filter, smooth)):
        for step in range(timesteps):
            errors[label].append(hamming(true[:, step].ravel() > 0.5, var[:, step].ravel() > 0.5))

        plt.plot(np.linspace(1, timesteps, num=timesteps).astype(int), errors[label], linewidth=3, ms=20, label=label)
    plt.xlabel('Steps', fontsize=20)
    plt.ylabel('Hamming distance', fontsize=20)
    plt.legend(fontsize=20)
    plt.savefig(filename)
    plt.close() 
Example #9
Source File: classification.py    From brainiak with Apache License 2.0 5 votes vote down vote up
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj):
    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto')
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    for i in range(num_subjects):
        leave_start = i * num_epochs_per_subj
        leave_end = (i+1) * num_epochs_per_subj
        training_data = raw_data[0:leave_start] + raw_data[leave_end:]
        test_data = raw_data[leave_start:leave_end]
        training_labels = labels[0:leave_start] + labels[leave_end:]
        test_labels = labels[leave_start:leave_end]
        clf.fit(list(zip(training_data, training_data)), training_labels)
        # joblib can be used for saving and loading models
        #joblib.dump(clf, 'model/logistic.pkl')
        #clf = joblib.load('model/svm.pkl')
        predict = clf.predict(list(zip(test_data, test_data)))
        print(predict)
        print(clf.decision_function(list(zip(test_data, test_data))))
        incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj
        logger.info(
            'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' %
            (i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj,
             (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj)
        )
        print(clf.score(list(zip(test_data, test_data)), test_labels)) 
Example #10
Source File: utils.py    From teneto with GNU General Public License v3.0 5 votes vote down vote up
def get_distance_function(requested_metric):
    """
    This function returns a specified distance function.

    Paramters
    ---------

    requested_metric: str
        Distance function. Can be any function in: https://docs.scipy.org/doc/scipy/reference/spatial.distance.html.

    Returns
    -------

    requested_metric : distance function

    """
    distance_options = {
        'braycurtis': distance.braycurtis,
        'canberra': distance.canberra,
        'chebyshev': distance.chebyshev,
        'cityblock': distance.cityblock,
        'correlation': distance.correlation,
        'cosine': distance.cosine,
        'euclidean': distance.euclidean,
        'sqeuclidean': distance.sqeuclidean,
        'dice': distance.dice,
        'hamming': distance.hamming,
        'jaccard': distance.jaccard,
        'kulsinski': distance.kulsinski,
        'matching': distance.matching,
        'rogerstanimoto': distance.rogerstanimoto,
        'russellrao': distance.russellrao,
        'sokalmichener': distance.sokalmichener,
        'sokalsneath': distance.sokalsneath,
        'yule': distance.yule,
    }
    if requested_metric in distance_options:
        return distance_options[requested_metric]
    else:
        raise ValueError('Distance function cannot be found.') 
Example #11
Source File: utils.py    From teneto with GNU General Public License v3.0 5 votes vote down vote up
def check_distance_funciton_input(distance_func_name, netinfo):
    """
    Function checks distance_func_name, if it is specified as 'default'.
    Then given the type of the network selects a default distance function.

    Parameters
    ----------

    distance_func_name : str
        distance function name.

    netinfo : dict
        the output of utils.process_input

    Returns
    -------

    distance_func_name : str
        distance function name.
    """
    if distance_func_name == 'default' and netinfo['nettype'][0] == 'b':
        print('Default distance funciton specified. As network is binary, using Hamming')
        distance_func_name = 'hamming'
    elif distance_func_name == 'default' and netinfo['nettype'][0] == 'w':
        distance_func_name = 'euclidean'
        print(
            'Default distance funciton specified. '
            'As network is weighted, using Euclidean')

    return distance_func_name 
Example #12
Source File: taar_similarity.py    From telemetry-airflow with Mozilla Public License 2.0 5 votes vote down vote up
def similarity_function(x, y):
    """ Similarity function for comparing user features.

    This actually really should be implemented in taar.similarity_recommender
    and then imported here for consistency.
    """

    def safe_get(field, row, default_value):
        # Safely get a value from the Row. If the value is None, get the
        # default value.
        return row[field] if row[field] is not None else default_value

    # Extract the values for the categorical and continuous features for both
    # the x and y samples. Use an empty string as the default value for missing
    # categorical fields and 0 for the continuous ones.
    x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES]
    y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES]
    x_continuous_features = [
        float(safe_get(k, x, 0)) for k in CONTINUOUS_FEATURES
    ]
    y_continuous_features = [
        float(safe_get(k, y, 0)) for k in CONTINUOUS_FEATURES
    ]

    # Here a larger distance indicates a poorer match between categorical variables.
    j_d = distance.hamming(x_categorical_features, y_categorical_features)
    j_c = distance.canberra(x_continuous_features, y_continuous_features)

    # Take the product of similarities to attain a univariate similarity score.
    # Add a minimal constant to prevent zero values from categorical features.
    # Note: since both the distance function return a Numpy type, we need to
    # call the |item| function to get the underlying Python type. If we don't
    # do that this job will fail when performing KDE due to SPARK-20803 on
    # Spark 2.2.0.
    return abs((j_c + 0.001) * j_d).item() 
Example #13
Source File: taar_similarity.py    From python_mozetl with MIT License 5 votes vote down vote up
def similarity_function(x, y):
    """ Similarity function for comparing user features.

    This actually really should be implemented in taar.similarity_recommender
    and then imported here for consistency.
    """

    def safe_get(field, row, default_value):
        # Safely get a value from the Row. If the value is None, get the
        # default value.
        return row[field] if row[field] is not None else default_value

    # Extract the values for the categorical and continuous features for both
    # the x and y samples. Use an empty string as the default value for missing
    # categorical fields and 0 for the continuous ones.
    x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES]
    x_continuous_features = [safe_get(k, x, 0) for k in CONTINUOUS_FEATURES]
    y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES]
    y_continuous_features = [safe_get(k, y, 0) for k in CONTINUOUS_FEATURES]

    # Here a larger distance indicates a poorer match between categorical variables.
    j_d = distance.hamming(x_categorical_features, y_categorical_features)
    j_c = distance.canberra(x_continuous_features, y_continuous_features)

    # Take the product of similarities to attain a univariate similarity score.
    # Add a minimal constant to prevent zero values from categorical features.
    # Note: since both the distance function return a Numpy type, we need to
    # call the |item| function to get the underlying Python type. If we don't
    # do that this job will fail when performing KDE due to SPARK-20803 on
    # Spark 2.2.0.
    return abs((j_c + 0.001) * j_d).item() 
Example #14
Source File: ripple_carry_adder.py    From forest-benchmarking with Apache License 2.0 5 votes vote down vote up
def get_error_hamming_distributions_from_results(results: Sequence[Sequence[Sequence[int]]]) \
        -> Sequence[Sequence[float]]:
    """
    Get the distribution of the hamming weight of the error vector (number of bits flipped
    between output and expected answer) for each possible pair of two n_bit summands using
    results output by get_n_bit_adder_results

    :param results: a list of results output from a call to get_n_bit_adder_results
    :return: the relative frequency of observing each hamming weight, 0 to n_bits+1, for the error
        that occurred when adding each pair of two n_bit summands
    """
    num_shots = len(results[0])
    n_bits = len(results[0][0]) - 1

    hamming_wt_distrs = []
    # loop over all binary strings of length n_bits
    for result, bits in zip(results, all_bitstrings(2 * n_bits)):
        # Input nums are written from (MSB .... LSB) = (a_n, ..., a_1, a_0)
        num_a = bit_array_to_int(bits[:n_bits])
        num_b = bit_array_to_int(bits[n_bits:])

        # add the numbers
        ans = num_a + num_b
        ans_bits = int_to_bit_array(ans, n_bits + 1)

        # record the fraction of shots that resulted in an error of the given weight
        hamming_wt_distr = [0. for _ in range(len(ans_bits) + 1)]
        for shot in result:
            # multiply relative hamming distance by the length of the output for the weight
            wt = len(ans_bits) * hamming(ans_bits, shot)
            hamming_wt_distr[int(wt)] += 1. / num_shots

        hamming_wt_distrs.append(hamming_wt_distr)

    return hamming_wt_distrs