Python sklearn.metrics.f1_score() Examples

The following are 30 code examples for showing how to use sklearn.metrics.f1_score(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .

Example 1
Project: edge2vec   Author: RoyZhengGao   File: multi_class_classification.py    License: BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted) 
Example 2
Project: edge2vec   Author: RoyZhengGao   File: link_prediction.py    License: BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def evaluation_analysis(true_label,predicted): 
    '''
    return all metrics results
    '''
    print "accuracy",metrics.accuracy_score(true_label, predicted)
    print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')     
    print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') 
    print "precision score",metrics.precision_score(true_label, predicted, average='macro') 
    print "recall score",metrics.recall_score(true_label, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(true_label, predicted)
    print "classification_report", metrics.classification_report(true_label, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
    print "log_loss", metrics.log_loss(true_label, predicted)
    print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
    print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
    print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted) 
Example 3
Project: nettack   Author: danielzuegner   File: GCN.py    License: MIT License 6 votes vote down vote up
def eval_class(ids_to_eval, model, z_obs):
    """
    Evaluate the model's classification performance.

    Parameters
    ----------
    ids_to_eval: np.array
        The indices of the nodes whose predictions will be evaluated.

    model: GCN
        The model to evaluate.

    z_obs: np.array
        The labels of the nodes in ids_to_eval

    Returns
    -------
    [f1_micro, f1_macro] scores

    """
    test_pred = model.predictions.eval(session=model.session, feed_dict={model.node_ids: ids_to_eval}).argmax(1)
    test_real = z_obs[ids_to_eval]

    return f1_score(test_real, test_pred, average='micro'), f1_score(test_real, test_pred, average='macro') 
Example 4
Project: Attention-Gated-Networks   Author: ozan-oktay   File: utils.py    License: MIT License 6 votes vote down vote up
def classification_scores(gts, preds, labels):
    accuracy        = metrics.accuracy_score(gts,  preds)
    class_accuracies = []
    for lab in labels: # TODO Fix
        class_accuracies.append(metrics.accuracy_score(gts[gts == lab], preds[gts == lab]))
    class_accuracies = np.array(class_accuracies)

    f1_micro        = metrics.f1_score(gts,        preds, average='micro')
    precision_micro = metrics.precision_score(gts, preds, average='micro')
    recall_micro    = metrics.recall_score(gts,    preds, average='micro')
    f1_macro        = metrics.f1_score(gts,        preds, average='macro')
    precision_macro = metrics.precision_score(gts, preds, average='macro')
    recall_macro    = metrics.recall_score(gts,    preds, average='macro')

    # class wise score
    f1s        = metrics.f1_score(gts,        preds, average=None)
    precisions = metrics.precision_score(gts, preds, average=None)
    recalls    = metrics.recall_score(gts,    preds, average=None)

    confusion = metrics.confusion_matrix(gts,preds, labels=labels)

    #TODO confusion matrix, recall, precision
    return accuracy, f1_micro, precision_micro, recall_micro, f1_macro, precision_macro, recall_macro, confusion, class_accuracies, f1s, precisions, recalls 
Example 5
Project: pytorch_geometric   Author: rusty1s   File: signed_gcn.py    License: MIT License 6 votes vote down vote up
def test(self, z, pos_edge_index, neg_edge_index):
        """Evaluates node embeddings :obj:`z` on positive and negative test
        edges by computing AUC and F1 scores.

        Args:
            z (Tensor): The node embeddings.
            pos_edge_index (LongTensor): The positive edge indices.
            neg_edge_index (LongTensor): The negative edge indices.
        """
        with torch.no_grad():
            pos_p = self.discriminate(z, pos_edge_index)[:, :2].max(dim=1)[1]
            neg_p = self.discriminate(z, neg_edge_index)[:, :2].max(dim=1)[1]
        pred = (1 - torch.cat([pos_p, neg_p])).cpu()
        y = torch.cat(
            [pred.new_ones((pos_p.size(0))),
             pred.new_zeros(neg_p.size(0))])
        pred, y = pred.numpy(), y.numpy()

        auc = roc_auc_score(y, pred)
        f1 = f1_score(y, pred, average='binary') if pred.sum() > 0 else 0

        return auc, f1 
Example 6
Project: malss   Author: canard0328   File: test.py    License: MIT License 6 votes vote down vote up
def test_classification_2classes_small():
    X, y = make_classification(n_samples=1000,
                               n_features=10,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification').fit(X, y,
                                      'test_classification_2classes_small')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 5
    assert cls.algorithms[0].best_score is not None 
Example 7
Project: malss   Author: canard0328   File: test.py    License: MIT License 6 votes vote down vote up
def test_classification_2classes_small_jp():
    X, y = make_classification(n_samples=1000,
                               n_features=10,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification',
                lang='jp').fit(X, y, 'test_classification_2classes_small_jp')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 5
    assert cls.algorithms[0].best_score is not None 
Example 8
Project: malss   Author: canard0328   File: test.py    License: MIT License 6 votes vote down vote up
def test_classification_multiclass_small():
    X, y = make_classification(n_samples=1000,
                               n_features=10,
                               n_classes=3,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification').fit(X, y,
                                      'test_classification_multiclass_small')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 5
    assert cls.algorithms[0].best_score is not None 
Example 9
Project: malss   Author: canard0328   File: test.py    License: MIT License 6 votes vote down vote up
def test_classification_2classes_medium():
    X, y = make_classification(n_samples=100000,
                               n_features=10,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification').fit(X, y,
                                      'test_classification_2classes_medium')

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 4
    assert cls.algorithms[0].best_score is not None 
Example 10
Project: malss   Author: canard0328   File: test.py    License: MIT License 6 votes vote down vote up
def test_classification_2classes_big():
    X, y = make_classification(n_samples=200000,
                               n_features=20,
                               n_classes=2,
                               n_clusters_per_class=1,
                               random_state=0)
    X = pd.DataFrame(X)
    y = pd.Series(y)
    cls = MALSS('classification').fit(X, y,
                                      'test_classification_2classes_big')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(X)
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 1
    assert cls.algorithms[0].best_score is not None 
Example 11
Project: malss   Author: canard0328   File: test.py    License: MIT License 6 votes vote down vote up
def test_ndarray():
    data = pd.read_csv('http://faculty.marshall.usc.edu/gareth-james/ISL/Heart.csv',
                       index_col=0, na_values=[''])

    y = data['AHD']
    del data['AHD']

    cls = MALSS('classification').fit(np.array(data), np.array(y),
                                      'test_ndarray')
    cls.generate_module_sample()

    from sklearn.metrics import f1_score
    pred = cls.predict(np.array(data))
    print(f1_score(y, pred, average=None))

    assert len(cls.algorithms) == 5
    assert cls.algorithms[0].best_score is not None 
Example 12
Project: Document-Classifier-LSTM   Author: AlexGidiotis   File: classifier.py    License: MIT License 6 votes vote down vote up
def f1_score(y_true, y_pred):
	"""
	Compute the micro f(b) score with b=1.
	"""
	y_true = tf.cast(y_true, "float32")
	y_pred = tf.cast(tf.round(y_pred), "float32") # implicit 0.5 threshold via tf.round
	y_correct = y_true * y_pred


	sum_true = tf.reduce_sum(y_true, axis=1)
	sum_pred = tf.reduce_sum(y_pred, axis=1)
	sum_correct = tf.reduce_sum(y_correct, axis=1)


	precision = sum_correct / sum_pred
	recall = sum_correct / sum_true
	f_score = 2 * precision * recall / (precision + recall)
	f_score = tf.where(tf.is_nan(f_score), tf.zeros_like(f_score), f_score)


	return tf.reduce_mean(f_score) 
Example 13
Project: Document-Classifier-LSTM   Author: AlexGidiotis   File: classifier.py    License: MIT License 6 votes vote down vote up
def load_model(stamp):
	"""
	"""

	json_file = open(stamp+'.json', 'r')
	loaded_model_json = json_file.read()
	json_file.close()
	model = model_from_json(loaded_model_json, {'AttentionWithContext': AttentionWithContext})

	model.load_weights(stamp+'.h5')
	print("Loaded model from disk")

	model.summary()


	adam = Adam(lr=0.001)
	model.compile(loss='binary_crossentropy',
		optimizer=adam,
		metrics=[f1_score])


	return model 
Example 14
Project: Document-Classifier-LSTM   Author: AlexGidiotis   File: hatt_classifier.py    License: MIT License 6 votes vote down vote up
def f1_score(y_true, y_pred):
	"""
	Compute the micro f(b) score with b=1.
	"""
	y_true = tf.cast(y_true, "float32")
	y_pred = tf.cast(tf.round(y_pred), "float32") # implicit 0.5 threshold via tf.round
	y_correct = y_true * y_pred


	sum_true = tf.reduce_sum(y_true, axis=1)
	sum_pred = tf.reduce_sum(y_pred, axis=1)
	sum_correct = tf.reduce_sum(y_correct, axis=1)


	precision = sum_correct / sum_pred
	recall = sum_correct / sum_true
	f_score = 2 * precision * recall / (precision + recall)
	f_score = tf.where(tf.is_nan(f_score), tf.zeros_like(f_score), f_score)


	return tf.reduce_mean(f_score) 
Example 15
Project: DeepLearningSmells   Author: tushartushar   File: metrics_util.py    License: Apache License 2.0 6 votes vote down vote up
def get_all_metrics(model, eval_data, eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    score = model.evaluate(eval_data, eval_labels, verbose=0)
    print("Test accuracy: " + str(score[1]))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr 
Example 16
Project: DeepLearningSmells   Author: tushartushar   File: metrics_util.py    License: Apache License 2.0 6 votes vote down vote up
def get_all_metrics_(eval_labels, pred_labels):
    fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
    auc_ = auc(fpr, tpr)
    print("auc_keras:" + str(auc_))

    precision = precision_score(eval_labels, pred_labels)
    print('Precision score: {0:0.2f}'.format(precision))

    recall = recall_score(eval_labels, pred_labels)
    print('Recall score: {0:0.2f}'.format(recall))

    f1 = f1_score(eval_labels, pred_labels)
    print('F1 score: {0:0.2f}'.format(f1))

    average_precision = average_precision_score(eval_labels, pred_labels)
    print('Average precision-recall score: {0:0.2f}'.format(average_precision))

    return auc_, precision, recall, f1, average_precision, fpr, tpr 
Example 17
Project: dgl   Author: dmlc   File: train_sampling_unsupervised.py    License: Apache License 2.0 6 votes vote down vote up
def compute_acc(emb, labels, train_nids, val_nids, test_nids):
    """
    Compute the accuracy of prediction given the labels.
    """
    emb = emb.cpu().numpy()
    train_nids = train_nids.cpu().numpy()
    train_labels = labels[train_nids].cpu().numpy()
    val_nids = val_nids.cpu().numpy()
    val_labels = labels[val_nids].cpu().numpy()
    test_nids = test_nids.cpu().numpy()
    test_labels = labels[test_nids].cpu().numpy()

    emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True)

    lr = lm.LogisticRegression(multi_class='multinomial', max_iter=10000)
    lr.fit(emb[train_nids], labels[train_nids])

    pred = lr.predict(emb)
    f1_micro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='micro')
    f1_micro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='micro')
    f1_macro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='macro')
    f1_macro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='macro')
    return f1_micro_eval, f1_micro_test 
Example 18
Project: PCNN   Author: pencoa   File: pcnn_model.py    License: Apache License 2.0 6 votes vote down vote up
def run_evaluate(self, test):
        """Evaluates performance on test set

        Args:
            test: dataset that yields tuple of (sentences, relation tags)

        Returns:
            metrics: (dict) metrics["acc"] = 98.4, ...

        """
        y_true, y_pred = [], []
        for data in minibatches(test, self.config.batch_size):
            word_batch, pos1_batch, pos2_batch, pos_batch, y_batch = data
            relations_pred = self.predict_batch(word_batch, pos1_batch, pos2_batch, pos_batch)
            assert len(relations_pred) == len(y_batch)
            y_true += y_batch
            y_pred += relations_pred.tolist()

        acc = accuracy_score(y_true, y_pred)
        p   = precision_score(y_true, y_pred, average='macro')
        r   = recall_score(y_true, y_pred, average='macro')
        f1  = f1_score(y_true, y_pred, average='macro')

        return {"acc":acc, "p":p, "r":r, "f1":f1} 
Example 19
Project: Projects   Author: iamshang1   File: tf_san.py    License: MIT License 6 votes vote down vote up
def score(self,data,labels,batch_size=64):
        '''
        return the micro and macro f-score of predicted labels on given data

        parameters:
          - data: numpy array
            2d numpy array (doc x word ids) of input data
          - labels: numpy array
            1d numpy array of labels for given data
          - batch size: int (default: 64)
            batch size to use during inference
        
        outputs:
            tuple of floats (micro,macro) representing micro and macro f-score
            of predicted labels on given data
        ''' 
        
        y_pred = self.predict(data,batch_size)
        micro = f1_score(labels,y_pred,average='micro')
        macro = f1_score(labels,y_pred,average='macro')
        return micro,macro 
Example 20
Project: Projects   Author: iamshang1   File: tf_cnn.py    License: MIT License 6 votes vote down vote up
def score(self,data,labels,batch_size=64):
        '''
        return the micro and macro f-score of predicted labels on given data

        parameters:
          - data: numpy array
            2d numpy array (doc x word ids) of input data
          - labels: numpy array
            1d numpy array of labels for given data
          - batch size: int (default: 64)
            batch size to use during inference
        
        outputs:
            tuple of floats (micro,macro) representing micro and macro f-score
            of predicted labels on given data
        ''' 
        
        y_pred = self.predict(data,batch_size)
        micro = f1_score(labels,y_pred,average='micro')
        macro = f1_score(labels,y_pred,average='macro')
        return micro,macro 
Example 21
Project: Projects   Author: iamshang1   File: tf_han.py    License: MIT License 6 votes vote down vote up
def score(self,data,labels,batch_size=64):
        '''
        return the micro and macro f-score of predicted labels on given data

        parameters:
          - data: numpy array
            3d numpy array (doc x sentence x word ids) of input data
          - labels: numpy array
            1d numpy array of labels for given data
          - batch size: int (default: 64)
            batch size to use during inference
        
        outputs:
            tuple of floats (micro,macro) representing micro and macro f-score
            of predicted labels on given data
        '''  
        
        y_pred = self.predict(data,batch_size)
        micro = f1_score(labels,y_pred,average='micro')
        macro = f1_score(labels,y_pred,average='macro')
        return micro,macro 
Example 22
Project: Projects   Author: iamshang1   File: tf_hisan.py    License: MIT License 6 votes vote down vote up
def score(self,data,labels,batch_size=64):
        '''
        return the micro and macro f-score of predicted labels on given data

        parameters:
          - data: numpy array
            3d numpy array (doc x sentence x word ids) of input data
          - labels: numpy array
            1d numpy array of labels for given data
          - batch size: int (default: 64)
            batch size to use during inference
        
        outputs:
            tuple of floats (micro,macro) representing micro and macro f-score
            of predicted labels on given data
        '''     
        
        y_pred = self.predict(data,batch_size)
        micro = f1_score(labels,y_pred,average='micro')
        macro = f1_score(labels,y_pred,average='macro')
        return micro,macro 
Example 23
Project: comparable-text-miner   Author: motazsaad   File: textpro.py    License: Apache License 2.0 6 votes vote down vote up
def evaluate(trueValues, predicted, decimals, note):
	print note
	label = 1
	avg = 'weighted'
	a = accuracy_score(trueValues, predicted)
	p = precision_score(trueValues, predicted, pos_label=label, average=avg)
	r = recall_score(trueValues, predicted, pos_label=label, average=avg)
	avg_f1 = f1_score(trueValues, predicted, pos_label=label, average=avg)
	fclasses = f1_score(trueValues, predicted, average=None)
	f1c1 = fclasses[0]; f1c2 = fclasses[1]
	fw = (f1c1 + f1c2)/2.0

	print 'accuracy:\t', str(round(a,decimals))
	print 'precision:\t', str(round(p,decimals))
	print 'recall:\t', str(round(r,decimals))
	print 'avg f1:\t', str(round(avg_f1,decimals))
	print 'c1 f1:\t', str(round(f1c1,decimals))
	print 'c2 f1:\t', str(round(f1c2,decimals))
	print 'avg(c1,c2):\t', str(round(fw,decimals))
	print '------------'

###################################################################################


# split a parallel or comparable corpus into two parts 
Example 24
Project: MCF-3D-CNN   Author: xyj77   File: conv_featuremaps_visualization.py    License: MIT License 6 votes vote down vote up
def accuracy(y_true, y_pred):        
    # 计算混淆矩阵
    y = np.zeros(len(y_true))
    y_ = np.zeros(len(y_true))    
    for i in range(len(y_true)): 
        y[i] = np.argmax(y_true[i,:])
        y_[i] = np.argmax(y_pred[i,:])
    cnf_mat = confusion_matrix(y, y_)
    
    # Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
    # Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
    # Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
    
    # # 绘制ROC曲线
    # fpr, tpr, thresholds = roc_curve(y_true[:,0], y_pred[:,0])
    # Auc = auc(fpr, tpr)
    
    
    # 计算多分类评价值
    Sens = recall_score(y, y_, average='macro')
    Prec = precision_score(y, y_, average='macro')
    F1 = f1_score(y, y_, average='weighted') 
    Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
    return Sens, Prec, F1, cnf_mat 
Example 25
Project: BERT   Author: yyht   File: test_wsdm_interaction.py    License: Apache License 2.0 6 votes vote down vote up
def eval_fn(result):
        i = 0
        total_accuracy = 0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy accuracy {} {} f1 {}".format(total_accuracy/i, 
            accuracy, f1))
        return total_accuracy/ i, f1 
Example 26
Project: BERT   Author: yyht   File: eval_wsdm_interaction_test.py    License: Apache License 2.0 6 votes vote down vote up
def eval_fn(result):
        i = 0
        total_accuracy = 0
        total_loss = 0.0
        pred_prob = []
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                total_loss += eval_result["loss"]
                pred_prob.extend(eval_result["pred_prob"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy {} accuracy {} loss {} f1 {}".format(total_accuracy/i, 
            accuracy, total_loss/i, f1))
        return pred_prob 
Example 27
Project: BERT   Author: yyht   File: eval_oqmrc_test.py    License: Apache License 2.0 6 votes vote down vote up
def eval_fn(result):
        i = 0
        total_accuracy = 0
        total_loss = 0.0
        pred_prob = []
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                total_loss += eval_result["loss"]
                pred_prob.extend(eval_result["pred_prob"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy {} accuracy {} loss {} f1 {}".format(total_accuracy/i, 
            accuracy, total_loss/i, f1))
        return pred_prob 
Example 28
Project: BERT   Author: yyht   File: eval_oqmrc.py    License: Apache License 2.0 6 votes vote down vote up
def eval_fn(result):
        i = 0
        total_accuracy = 0
        total_loss = 0.0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                total_loss += eval_result["loss"]
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy {} accuracy {} loss {}, f1 {}".format(total_accuracy/i, 
            accuracy, total_loss/i, f1))
        return total_accuracy/ i 
Example 29
Project: BERT   Author: yyht   File: test_oqmrc_final.py    License: Apache License 2.0 6 votes vote down vote up
def eval_fn(result):
        i = 0
        total_accuracy = 0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy accuracy {} {}, f1 {}".format(total_accuracy/i, 
            accuracy, f1))
        return total_accuracy/ i, f1 
Example 30
Project: BERT   Author: yyht   File: test_wsdm.py    License: Apache License 2.0 6 votes vote down vote up
def eval_fn(result):
        i = 0
        total_accuracy = 0
        label, label_id = [], []
        while True:
            try:
                eval_result = sess.run(result)
                total_accuracy += eval_result["accuracy"]
                label_id.extend(eval_result["label_ids"])
                label.extend(eval_result["pred_label"])
                i += 1
            except tf.errors.OutOfRangeError:
                print("End of dataset")
                break
        f1 = f1_score(label_id, label, average="macro")
        accuracy = accuracy_score(label_id, label)
        print("test accuracy accuracy {} {} f1 {}".format(total_accuracy/i, 
            accuracy, f1))
        return total_accuracy/ i, f1