Python sklearn.metrics.jaccard_similarity_score() Examples

The following are 15 code examples of sklearn.metrics.jaccard_similarity_score(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .
Example #1
Source File: multi_class_classification.py    From edge2vec with BSD 3-Clause "New" or "Revised" License 11 votes vote down vote up
def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted) 
Example #2
Source File: link_prediction.py    From edge2vec with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def evaluation_analysis(true_label,predicted): 
    '''
    return all metrics results
    '''
    print "accuracy",metrics.accuracy_score(true_label, predicted)
    print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')     
    print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') 
    print "precision score",metrics.precision_score(true_label, predicted, average='macro') 
    print "recall score",metrics.recall_score(true_label, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(true_label, predicted)
    print "classification_report", metrics.classification_report(true_label, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
    print "log_loss", metrics.log_loss(true_label, predicted)
    print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
    print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
    print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted) 
Example #3
Source File: utils.py    From G-Bert with MIT License 6 votes vote down vote up
def metric_report(y_pred, y_true, therhold=0.5):
    y_prob = y_pred.copy()
    y_pred[y_pred > therhold] = 1
    y_pred[y_pred <= therhold] = 0

    acc_container = {}
    ja, prauc, avg_p, avg_r, avg_f1 = multi_label_metric(
        y_true, y_pred, y_prob)
    acc_container['jaccard'] = ja
    acc_container['f1'] = avg_f1
    acc_container['prauc'] = prauc

    # acc_container['jaccard'] = jaccard_similarity_score(y_true, y_pred)
    # acc_container['f1'] = f1(y_true, y_pred)
    # acc_container['auc'] = roc_auc(y_true, y_prob)
    # acc_container['prauc'] = precision_auc(y_true, y_prob)

    for k, v in acc_container.items():
        logger.info('%-10s : %-10.4f' % (k, v))

    return acc_container 
Example #4
Source File: test_classification.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_multilabel_jaccard_similarity_score_deprecation():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])

    # size(y1 \inter y2) = [1, 2]
    # size(y1 \union y2) = [2, 2]

    jss = partial(assert_warns, DeprecationWarning, jaccard_similarity_score)
    assert_equal(jss(y1, y2), 0.75)
    assert_equal(jss(y1, y1), 1)
    assert_equal(jss(y2, y2), 1)
    assert_equal(jss(y2, np.logical_not(y2)), 0)
    assert_equal(jss(y1, np.logical_not(y1)), 0)
    assert_equal(jss(y1, np.zeros(y1.shape)), 0)
    assert_equal(jss(y2, np.zeros(y1.shape)), 0) 
Example #5
Source File: feature_engineering.py    From CIKM-AnalytiCup-2018 with Apache License 2.0 6 votes vote down vote up
def _get_similarity_values(self, q1_csc, q2_csc):
        cosine_sim = []
        manhattan_dis = []
        eucledian_dis = []
        jaccard_dis = []
        minkowsk_dis = []
        
        for i,j in zip(q1_csc, q2_csc):
            sim = cs(i, j)
            cosine_sim.append(sim[0][0])
            sim = md(i, j)
            manhattan_dis.append(sim[0][0])
            sim = ed(i, j)
            eucledian_dis.append(sim[0][0])
            i_ = i.toarray()
            j_ = j.toarray()
            try:
                sim = jsc(i_, j_)
                jaccard_dis.append(sim)
            except:
                jaccard_dis.append(0)
                
            sim = minkowski_dis.pairwise(i_, j_)
            minkowsk_dis.append(sim[0][0])
        return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis 
Example #6
Source File: test_multioutput.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_classifier_chain_crossval_fit_and_predict():
    # Fit classifier chain with cross_val_predict and verify predict
    # performance
    X, Y = generate_multilabel_dataset_with_correlations()
    classifier_chain_cv = ClassifierChain(LogisticRegression(), cv=3)
    classifier_chain_cv.fit(X, Y)

    classifier_chain = ClassifierChain(LogisticRegression())
    classifier_chain.fit(X, Y)

    Y_pred_cv = classifier_chain_cv.predict(X)
    Y_pred = classifier_chain.predict(X)

    assert_equal(Y_pred_cv.shape, Y.shape)
    assert_greater(jaccard_similarity_score(Y, Y_pred_cv), 0.4)

    assert_not_equal(jaccard_similarity_score(Y, Y_pred_cv),
                     jaccard_similarity_score(Y, Y_pred)) 
Example #7
Source File: test_multioutput.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_classifier_chain_vs_independent_models():
    # Verify that an ensemble of classifier chains (each of length
    # N) can achieve a higher Jaccard similarity score than N independent
    # models
    X, Y = generate_multilabel_dataset_with_correlations()
    X_train = X[:600, :]
    X_test = X[600:, :]
    Y_train = Y[:600, :]
    Y_test = Y[600:, :]

    ovr = OneVsRestClassifier(LogisticRegression())
    ovr.fit(X_train, Y_train)
    Y_pred_ovr = ovr.predict(X_test)

    chain = ClassifierChain(LogisticRegression())
    chain.fit(X_train, Y_train)
    Y_pred_chain = chain.predict(X_test)

    assert_greater(jaccard_similarity_score(Y_test, Y_pred_chain),
                   jaccard_similarity_score(Y_test, Y_pred_ovr)) 
Example #8
Source File: utils.py    From edafa with MIT License 5 votes vote down vote up
def multi_iou(a,b):
    jk = 0.0
    vals = np.unique(a)[1:]
    for v in vals:
        ac = a.copy()
        ac[ac != v] = 0
        bc = b.copy()
        bc[bc != v] = 0
        jk += iou(ac,bc)
    return jk/len(vals) 
Example #9
Source File: model_metrics.py    From segmentation-unet-maskrcnn with MIT License 5 votes vote down vote up
def calc_jacc_img_msk(model, img, msk, batch_size, n_classes):
    prd = model.predict(img, batch_size= batch_size)
    #print("prd.shape {0}, msk.shape {1}". format(prd.shape, msk.shape))
     #prd.shape, msk.shape (16, 2, 256, 256) (16, 2, 256, 256)
    avg, trs = [], []

    for i in range(n_classes):
        t_msk = msk[:, i, :, :] # t_mask shape is (Npredictions, H, W)
        t_prd = prd[:, i, :, :]
        t_msk = t_msk.reshape(msk.shape[0] * msk.shape[2], msk.shape[3]) # shape is Npredictions*W, H
        t_prd = t_prd.reshape(msk.shape[0] * msk.shape[2], msk.shape[3])

        m, b_tr = 0, 0
        for j in range(10):
            tr = j / 10
            pred_binary_mask = t_prd > tr

            jk = jaccard_similarity_score(t_msk, pred_binary_mask)
            if jk > m:
                m = jk
                b_tr = tr

        print("i, m, b_tr", i, m, b_tr)
        avg.append(m)
        trs.append(b_tr)

    score = sum(avg) / n_classes
    return score, trs 
Example #10
Source File: scorer.py    From segmentation-unet-maskrcnn with MIT License 5 votes vote down vote up
def display_pred(pred_res, true_masks, config, modelName, amt_pred, trs, min_pred_sum, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    trs_str = "trs_none"
    if trs != None:
        trs_str = '_'.join([str(x) for x in trs])
        trs_str = "trs_" + trs_str

    #print("Saving predictions when np.sum(pred) >", min_pred_sum)
    nothing_saved = True
    for p in range(amt_pred):
        for i in range(config.NUM_CLASSES):
            pred = pred_res[p,:, :, i]
            true_mask = true_masks[p, :, :, i]

            sum_pred = np.sum(pred)
            sum_mask = np.sum(true_mask)
            if sum_pred > min_pred_sum :
                    #and sum_mask > min_pred_sum:

                jk = jaccard_similarity_score(true_mask, pred)
                #print("Calc jaccard", jk)
                fn = os.path.join(output_folder,"{4}{0}_p{1}_cl{2}_{3}.png".format(modelName, p, i, trs_str, jk))
                #print("Saving  predictions with np.sum {0} to  {1}".format(sum, fn))
                plt.imsave(fn, pred, cmap='hot')

                fn_tr= os.path.join(output_folder,"{4}{0}_p{1}_TRUE_cl{2}_{3}.png".format(modelName, p, i, trs_str, jk))
                plt.imsave(fn_tr, true_mask, cmap='hot')

                nothing_saved = False

    if (nothing_saved):
        print("All predictions did not satisfy: sum_pred > min_pred_sum, nothing saved. Min_pred_sum:", min_pred_sum) 
Example #11
Source File: similarity.py    From nlp_research with MIT License 5 votes vote down vote up
def similarity(self, query, type):
        assert self.corpus != None, "self.corpus can't be None"
        ret = []
        if type == 'cosine':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = cosine_similarity(item, query)
                ret.append(sim[0][0])
        elif type == 'manhattan':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = manhattan_distances(item, query)
                ret.append(sim[0][0])
        elif type == 'euclidean':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = euclidean_distances (item, query)
                ret.append(sim[0][0])
        #elif type == 'jaccard':
        #    #query = query.split()
        #    query = self.get_vector(query)
        #    for item in self.corpus_vec:
        #        pdb.set_trace()
        #        sim = jaccard_similarity_score(item, query)
        #        ret.append(sim)
        elif type == 'bm25':
            query = query.split()
            ret = self.bm25_model.get_scores(query)
        else:
            raise ValueError('similarity type error:%s'%type)
        return ret 
Example #12
Source File: score_alignments.py    From policy_diffusion with MIT License 5 votes vote down vote up
def jaccard_coefficient(left, right):
    jaccard_scores = jaccard_similarity_score(left,right)
    return jaccard_scores 
Example #13
Source File: utils.py    From DRFNS with MIT License 5 votes vote down vote up
def ComputeMetrics(prob, batch_labels, p1, p2, rgb=None, save_path=None, ind=0):
    """
    Computes all metrics between probability map and corresponding label.
    If you give also an rgb image it will save many extra meta data image.
    """
    GT = label(batch_labels.copy())
    PRED = PostProcess(prob, p1, p2)
    # PRED = label((prob > 0.5).astype('uint8'))
    lbl = GT.copy()
    pred = PRED.copy()
    aji = AJI_fast(lbl, pred)
    lbl[lbl > 0] = 1
    pred[pred > 0] = 1 
    l, p = lbl.flatten(), pred.flatten()
    acc = accuracy_score(l, p)
    roc = roc_auc_score(l, p)
    jac = jaccard_similarity_score(l, p)
    f1 = f1_score(l, p)
    recall = recall_score(l, p)
    precision = precision_score(l, p)
    if rgb is not None:
        xval_n = join(save_path, "xval_{}.png").format(ind)
        yval_n = join(save_path, "yval_{}.png").format(ind)
        prob_n = join(save_path, "prob_{}.png").format(ind)
        pred_n = join(save_path, "pred_{}.png").format(ind)
        c_gt_n = join(save_path, "C_gt_{}.png").format(ind)
        c_pr_n = join(save_path, "C_pr_{}.png").format(ind)

        imsave(xval_n, rgb)
        imsave(yval_n, color_bin(GT))
        imsave(prob_n, prob)
        imsave(pred_n, color_bin(PRED))
        imsave(c_gt_n, add_contours(rgb, GT))
        imsave(c_pr_n, add_contours(rgb, PRED))

    return acc, roc, jac, recall, precision, f1, aji 
Example #14
Source File: test_metrics.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_jaccard_similarity_score(self):
        result = self.df.metrics.jaccard_similarity_score()
        expected = metrics.jaccard_similarity_score(self.target, self.pred)
        self.assertEqual(result, expected)

        result = self.df.metrics.jaccard_similarity_score(normalize=False)
        expected = metrics.jaccard_similarity_score(self.target, self.pred, normalize=False)
        self.assertEqual(result, expected) 
Example #15
Source File: test_classification.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_multilabel_jaccard_similarity_score():
    # Dense label indicator matrix format
    y1 = np.array([[0, 1, 1], [1, 0, 1]])
    y2 = np.array([[0, 0, 1], [1, 0, 1]])

    # size(y1 \inter y2) = [1, 2]
    # size(y1 \union y2) = [2, 2]

    assert_equal(jaccard_similarity_score(y1, y2), 0.75)
    assert_equal(jaccard_similarity_score(y1, y1), 1)
    assert_equal(jaccard_similarity_score(y2, y2), 1)
    assert_equal(jaccard_similarity_score(y2, np.logical_not(y2)), 0)
    assert_equal(jaccard_similarity_score(y1, np.logical_not(y1)), 0)
    assert_equal(jaccard_similarity_score(y1, np.zeros(y1.shape)), 0)
    assert_equal(jaccard_similarity_score(y2, np.zeros(y1.shape)), 0)