Python sklearn.metrics.jaccard_similarity_score() Examples

The following are 15 code examples for showing how to use sklearn.metrics.jaccard_similarity_score(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .

Example 1
Project: edge2vec   Author: RoyZhengGao   File: multi_class_classification.py    License: BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted) 
Example 2
Project: edge2vec   Author: RoyZhengGao   File: link_prediction.py    License: BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def evaluation_analysis(true_label,predicted): 
    '''
    return all metrics results
    '''
    print "accuracy",metrics.accuracy_score(true_label, predicted)
    print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')     
    print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') 
    print "precision score",metrics.precision_score(true_label, predicted, average='macro') 
    print "recall score",metrics.recall_score(true_label, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(true_label, predicted)
    print "classification_report", metrics.classification_report(true_label, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
    print "log_loss", metrics.log_loss(true_label, predicted)
    print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
    print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
    print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted) 
Example 3
Project: G-Bert   Author: jshang123   File: utils.py    License: MIT License 6 votes vote down vote up
def metric_report(y_pred, y_true, therhold=0.5):
    y_prob = y_pred.copy()
    y_pred[y_pred > therhold] = 1
    y_pred[y_pred <= therhold] = 0

    acc_container = {}
    ja, prauc, avg_p, avg_r, avg_f1 = multi_label_metric(
        y_true, y_pred, y_prob)
    acc_container['jaccard'] = ja
    acc_container['f1'] = avg_f1
    acc_container['prauc'] = prauc

    # acc_container['jaccard'] = jaccard_similarity_score(y_true, y_pred)
    # acc_container['f1'] = f1(y_true, y_pred)
    # acc_container['auc'] = roc_auc(y_true, y_prob)
    # acc_container['prauc'] = precision_auc(y_true, y_prob)

    for k, v in acc_container.items():
        logger.info('%-10s : %-10.4f' % (k, v))

    return acc_container 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_classification.py    License: MIT License 6 votes vote down vote up
def test_multilabel_jaccard_similarity_score_deprecation():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])

    # size(y1 \inter y2) = [1, 2]
    # size(y1 \union y2) = [2, 2]

    jss = partial(assert_warns, DeprecationWarning, jaccard_similarity_score)
    assert_equal(jss(y1, y2), 0.75)
    assert_equal(jss(y1, y1), 1)
    assert_equal(jss(y2, y2), 1)
    assert_equal(jss(y2, np.logical_not(y2)), 0)
    assert_equal(jss(y1, np.logical_not(y1)), 0)
    assert_equal(jss(y1, np.zeros(y1.shape)), 0)
    assert_equal(jss(y2, np.zeros(y1.shape)), 0) 
Example 5
Project: CIKM-AnalytiCup-2018   Author: zake7749   File: feature_engineering.py    License: Apache License 2.0 6 votes vote down vote up
def _get_similarity_values(self, q1_csc, q2_csc):
        cosine_sim = []
        manhattan_dis = []
        eucledian_dis = []
        jaccard_dis = []
        minkowsk_dis = []
        
        for i,j in zip(q1_csc, q2_csc):
            sim = cs(i, j)
            cosine_sim.append(sim[0][0])
            sim = md(i, j)
            manhattan_dis.append(sim[0][0])
            sim = ed(i, j)
            eucledian_dis.append(sim[0][0])
            i_ = i.toarray()
            j_ = j.toarray()
            try:
                sim = jsc(i_, j_)
                jaccard_dis.append(sim)
            except:
                jaccard_dis.append(0)
                
            sim = minkowski_dis.pairwise(i_, j_)
            minkowsk_dis.append(sim[0][0])
        return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis 
Example 6
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_multioutput.py    License: MIT License 6 votes vote down vote up
def test_classifier_chain_crossval_fit_and_predict():
    # Fit classifier chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()
    classifier_chain_cv = ClassifierChain(LogisticRegression(), cv=3)
    classifier_chain_cv.fit(X, Y)

    classifier_chain = ClassifierChain(LogisticRegression())
    classifier_chain.fit(X, Y)

    Y_pred_cv = classifier_chain_cv.predict(X)
    Y_pred = classifier_chain.predict(X)

    assert_equal(Y_pred_cv.shape, Y.shape)
    assert_greater(jaccard_similarity_score(Y, Y_pred_cv), 0.4)

    assert_not_equal(jaccard_similarity_score(Y, Y_pred_cv),
                     jaccard_similarity_score(Y, Y_pred)) 
Example 7
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_multioutput.py    License: MIT License 6 votes vote down vote up
def test_classifier_chain_vs_independent_models():
    # Verify that an ensemble of classifier chains (each of length
    # N) can achieve a higher Jaccard similarity score than N independent
    # models
    X, Y = generate_multilabel_dataset_with_correlations()
    X_train = X[:600, :]
    X_test = X[600:, :]
    Y_train = Y[:600, :]
    Y_test = Y[600:, :]

    ovr = OneVsRestClassifier(LogisticRegression())
    ovr.fit(X_train, Y_train)
    Y_pred_ovr = ovr.predict(X_test)

    chain = ClassifierChain(LogisticRegression())
    chain.fit(X_train, Y_train)
    Y_pred_chain = chain.predict(X_test)

    assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain),
                   jaccard_similarity_score(Y_test, Y_pred_ovr)) 
Example 8
Project: edafa   Author: andrewekhalel   File: utils.py    License: MIT License 5 votes vote down vote up
def multi_iou(a,b):
    jk = 0.0
    vals = np.unique(a)[1:]
    for v in vals:
        ac = a.copy()
        ac[ac != v] = 0
        bc = b.copy()
        bc[bc != v] = 0
        jk += iou(ac,bc)
    return jk/len(vals) 
Example 9
Project: segmentation-unet-maskrcnn   Author: olgaliak   File: model_metrics.py    License: MIT License 5 votes vote down vote up
def calc_jacc_img_msk(model, img, msk, batch_size, n_classes):
    prd = model.predict(img, batch_size= batch_size)
    #print("prd.shape {0}, msk.shape {1}". format(prd.shape, msk.shape))
     #prd.shape, msk.shape (16, 2, 256, 256) (16, 2, 256, 256)
    avg, trs = [], []

    for i in range(n_classes):
        t_msk = msk[:, i, :, :] # t_mask shape is (Npredictions, H, W)
        t_prd = prd[:, i, :, :]
        t_msk = t_msk.reshape(msk.shape[0] * msk.shape[2], msk.shape[3]) # shape is Npredictions*W, H
        t_prd = t_prd.reshape(msk.shape[0] * msk.shape[2], msk.shape[3])

        m, b_tr = 0, 0
        for j in range(10):
            tr = j / 10
            pred_binary_mask = t_prd > tr

            jk = jaccard_similarity_score(t_msk, pred_binary_mask)
            if jk > m:
                m = jk
                b_tr = tr

        print("i, m, b_tr", i, m, b_tr)
        avg.append(m)
        trs.append(b_tr)

    score = sum(avg) / n_classes
    return score, trs 
Example 10
Project: segmentation-unet-maskrcnn   Author: olgaliak   File: scorer.py    License: MIT License 5 votes vote down vote up
def display_pred(pred_res, true_masks, config, modelName, amt_pred, trs, min_pred_sum, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    trs_str = "trs_none"
    if trs != None:
        trs_str = '_'.join([str(x) for x in trs])
        trs_str = "trs_" + trs_str

    #print("Saving predictions when np.sum(pred) >", min_pred_sum)
    nothing_saved = True
    for p in range(amt_pred):
        for i in range(config.NUM_CLASSES):
            pred = pred_res[p,:, :, i]
            true_mask = true_masks[p, :, :, i]

            sum_pred = np.sum(pred)
            sum_mask = np.sum(true_mask)
            if sum_pred > min_pred_sum :
                    #and sum_mask > min_pred_sum:

                jk = jaccard_similarity_score(true_mask, pred)
                #print("Calc jaccard", jk)
                fn = os.path.join(output_folder,"{4}{0}_p{1}_cl{2}_{3}.png".format(modelName, p, i, trs_str, jk))
                #print("Saving  predictions with np.sum {0} to  {1}".format(sum, fn))
                plt.imsave(fn, pred, cmap='hot')

                fn_tr= os.path.join(output_folder,"{4}{0}_p{1}_TRUE_cl{2}_{3}.png".format(modelName, p, i, trs_str, jk))
                plt.imsave(fn_tr, true_mask, cmap='hot')

                nothing_saved = False

    if (nothing_saved):
        print("All predictions did not satisfy: sum_pred > min_pred_sum, nothing saved. Min_pred_sum:", min_pred_sum) 
Example 11
Project: nlp_research   Author: zhufz   File: similarity.py    License: MIT License 5 votes vote down vote up
def similarity(self, query, type):
        assert self.corpus != None, "self.corpus can't be None"
        ret = []
        if type == 'cosine':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = cosine_similarity(item, query)
                ret.append(sim[0][0])
        elif type == 'manhattan':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = manhattan_distances(item, query)
                ret.append(sim[0][0])
        elif type == 'euclidean':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = euclidean_distances (item, query)
                ret.append(sim[0][0])
        #elif type == 'jaccard':
        #    #query = query.split()
        #    query = self.get_vector(query)
        #    for item in self.corpus_vec:
        #        pdb.set_trace()
        #        sim = jaccard_similarity_score(item, query)
        #        ret.append(sim)
        elif type == 'bm25':
            query = query.split()
            ret = self.bm25_model.get_scores(query)
        else:
            raise ValueError('similarity type error:%s'%type)
        return ret 
Example 12
Project: policy_diffusion   Author: dssg   File: score_alignments.py    License: MIT License 5 votes vote down vote up
def jaccard_coefficient(left, right):
    jaccard_scores = jaccard_similarity_score(left,right)
    return jaccard_scores 
Example 13
Project: DRFNS   Author: PeterJackNaylor   File: utils.py    License: MIT License 5 votes vote down vote up
def ComputeMetrics(prob, batch_labels, p1, p2, rgb=None, save_path=None, ind=0):
    """
    Computes all metrics between probability map and corresponding label.
    If you give also an rgb image it will save many extra meta data image.
    """
    GT = label(batch_labels.copy())
    PRED = PostProcess(prob, p1, p2)
    # PRED = label((prob > 0.5).astype('uint8'))
    lbl = GT.copy()
    pred = PRED.copy()
    aji = AJI_fast(lbl, pred)
    lbl[lbl > 0] = 1
    pred[pred > 0] = 1 
    l, p = lbl.flatten(), pred.flatten()
    acc = accuracy_score(l, p)
    roc = roc_auc_score(l, p)
    jac = jaccard_similarity_score(l, p)
    f1 = f1_score(l, p)
    recall = recall_score(l, p)
    precision = precision_score(l, p)
    if rgb is not None:
        xval_n = join(save_path, "xval_{}.png").format(ind)
        yval_n = join(save_path, "yval_{}.png").format(ind)
        prob_n = join(save_path, "prob_{}.png").format(ind)
        pred_n = join(save_path, "pred_{}.png").format(ind)
        c_gt_n = join(save_path, "C_gt_{}.png").format(ind)
        c_pr_n = join(save_path, "C_pr_{}.png").format(ind)

        imsave(xval_n, rgb)
        imsave(yval_n, color_bin(GT))
        imsave(prob_n, prob)
        imsave(pred_n, color_bin(PRED))
        imsave(c_gt_n, add_contours(rgb, GT))
        imsave(c_pr_n, add_contours(rgb, PRED))

    return acc, roc, jac, recall, precision, f1, aji 
Example 14
Project: pandas-ml   Author: pandas-ml   File: test_metrics.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_jaccard_similarity_score(self):
        result = self.df.metrics.jaccard_similarity_score()
        expected = metrics.jaccard_similarity_score(self.target, self.pred)
        self.assertEqual(result, expected)

        result = self.df.metrics.jaccard_similarity_score(normalize=False)
        expected = metrics.jaccard_similarity_score(self.target, self.pred, normalize=False)
        self.assertEqual(result, expected) 
Example 15
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_classification.py    License: MIT License 5 votes vote down vote up
def test_multilabel_jaccard_similarity_score():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])

    # size(y1 \inter y2) = [1, 2]
    # size(y1 \union y2) = [2, 2]

    assert_equal(jaccard_similarity_score(y1, y2), 0.75)
    assert_equal(jaccard_similarity_score(y1, y1), 1)
    assert_equal(jaccard_similarity_score(y2, y2), 1)
    assert_equal(jaccard_similarity_score(y2, np.logical_not(y2)), 0)
    assert_equal(jaccard_similarity_score(y1, np.logical_not(y1)), 0)
    assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0)
    assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0)