Python sklearn.metrics.cohen_kappa_score() Examples

The following are 22 code examples of sklearn.metrics.cohen_kappa_score(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function

Example #1

Source File: cnn.py From Price_Prediction_LOB with MIT License

6 votes

def evaluate(source, source_batch):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    y_true = []  # true labels
    y_pred = []  # predicted labels
    for i in range(len(source_batch)):
        data, targets = get_batch(source, source_batch, i)
        outputs = model(data)
        total_loss += len(targets) * criterion(outputs, targets).data
        _, predicted = torch.max(outputs, 1)
        y_true.extend(targets.tolist())
        y_pred.extend(predicted.tolist())
    val_loss = total_loss.item() / np.size(source_batch)
    # Make report for the classfier
    report = classification_report(y_true, y_pred, target_names=classes)
    kappa = cohen_kappa_score(y_true, y_pred)
    return val_loss, kappa, report

# Loop over epochs

Example #2

Source File: rnn.py From Price_Prediction_LOB with MIT License

6 votes

def evaluate(source, source_batch):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    y_true = []  # true labels
    y_pred = []  # predicted labels
    hidden = model.init_hidden(args.bsz)
    for i in range(len(source_batch)):

        data, targets = get_batch(source, source_batch, i)
        output, hidden = model(data, hidden)
        total_loss += len(targets) * criterion(output[-1], targets).data
        _, predicted = torch.max(output[-1], 1)
        y_true.extend(targets.tolist())
        y_pred.extend(predicted.tolist())
        hidden = repackage_hidden(hidden)
    val_loss = total_loss.item() / np.size(source_batch)
    # Make report for the classfier
    report = classification_report(y_true, y_pred, target_names=classes)
    kappa = cohen_kappa_score(y_true, y_pred)
    return val_loss, kappa, report

# Loop over epochs

Example #3

Source File: TransferLearning_reg.py From Intelligent-Projects-Using-Python with MIT License

6 votes

def inference_validation(self,test_X,test_y,model_save_dest,n_class=5,folds=5):
		print(test_X.shape,test_y.shape)
		pred = np.zeros(test_X.shape[0])
		for k in range(1,folds + 1):
			print(f'running inference on fold: {k}')
			model = keras.models.load_model(model_save_dest[k])
			pred = pred + model.predict(test_X)[:,0]
			pred = pred
			print(pred.shape)
			print(pred)
		pred = pred/float(folds)
		pred_class = np.round(pred)
		pred_class = np.array(pred_class,dtype=int)
		pred_class = list(map(lambda x:4 if x > 4 else x,pred_class))
		pred_class = list(map(lambda x:0 if x < 0 else x,pred_class))
		act_class = test_y 
		accuracy = np.sum([pred_class == act_class])*1.0/len(test_X)
		kappa = cohen_kappa_score(pred_class,act_class,weights='quadratic')
		return pred_class,accuracy,kappa

Example #4

Source File: cnn_class.py From eyenet with MIT License

6 votes

def predict(self):
        """
        Predicts the model output, and computes precision, recall, and F1 score.

        INPUT
            model: Model trained in Keras

        OUTPUT
            Precision, Recall, and F1 score
        """
        predictions = self.model.predict(self.X_test)
        predictions = np.argmax(predictions, axis=1)

        # predictions[predictions >=1] = 1 # Remove when non binary classifier

        self.y_test = np.argmax(self.y_test, axis=1)

        precision = precision_score(self.y_test, predictions, average="micro")
        recall = recall_score(self.y_test, predictions, average="micro")
        f1 = f1_score(self.y_test, predictions, average="micro")
        cohen_kappa = cohen_kappa_score(self.y_test, predictions)
        quad_kappa = kappa(self.y_test, predictions, weights='quadratic')
        return precision, recall, f1, cohen_kappa, quad_kappa

Example #5

Source File: cnn_class.py From AI_in_Medicine_Clinical_Imaging_Classification with MIT License

6 votes

def predict(self):
        """
        Predicts the model output, and computes precision, recall, and F1 score.

        INPUT
            model: Model trained in Keras

        OUTPUT
            Precision, Recall, and F1 score
        """
        predictions = self.model.predict(self.X_test)
        predictions = np.argmax(predictions, axis=1)

        # predictions[predictions >=1] = 1 # Remove when non binary classifier

        self.y_test = np.argmax(self.y_test, axis=1)

        precision = precision_score(self.y_test, predictions, average="micro")
        recall = recall_score(self.y_test, predictions, average="micro")
        f1 = f1_score(self.y_test, predictions, average="micro")
        cohen_kappa = cohen_kappa_score(self.y_test, predictions)
        quad_kappa = kappa(self.y_test, predictions, weights='quadratic')
        return precision, recall, f1, cohen_kappa, quad_kappa

Example #6

Source File: metrics.py From MultiPlanarUNet with MIT License

6 votes

def class_wise_kappa(true, pred, n_classes=None, ignore_zero=True):
    from sklearn.metrics import cohen_kappa_score
    if n_classes is None:
        classes = np.unique(true)
    else:
        classes = np.arange(max(2, n_classes))
    # Ignore background class?
    if ignore_zero:
        classes = classes[np.where(classes != 0)]

    # Calculate kappa for all targets
    kappa_scores = np.empty(shape=classes.shape, dtype=np.float32)
    kappa_scores.fill(np.nan)
    for idx, _class in enumerate(classes):
        s1 = true == _class
        s2 = pred == _class

        if np.any(s1) or np.any(s2):
            kappa_scores[idx] = cohen_kappa_score(s1, s2)
    return kappa_scores

Example #7

Source File: labeled_reviews_comparator.py From yelp with GNU Lesser General Public License v2.1

6 votes

def toy_cohens_kappa():
    # rater1 = [1, 1, 1, 0]
    # rater2 = [1, 1, 0, 0]
    # rater3 = [0, 1, 1]
    rater1 = ['s', 's', 's', 'g', 'u']
    rater2 = ['s', 's', 'g', 'g', 's']

    taskdata = [[0, str(i), str(rater1[i])] for i in range(0, len(rater1))] + [
        [1, str(i), str(rater2[i])] for i in range(0, len(rater2))] # + [
                   # [2, str(i), str(rater3[i])] for i in range(0, len(rater3))]
    print(taskdata)
    ratingtask = agreement.AnnotationTask(data=taskdata)
    print("kappa " + str(ratingtask.kappa()))
    print("fleiss " + str(ratingtask.multi_kappa()))
    print("alpha " + str(ratingtask.alpha()))
    print("scotts " + str(ratingtask.pi()))

    print("sklearn kappa " + str(cohen_kappa_score(rater1, rater2)))

Example #8

Source File: metrics.py From minetorch with MIT License

5 votes

def _kappa_score(self):
        png_file = self.scalars(
            {'kappa_score': cohen_kappa_score(self.targets, self.predicts, weights='quadratic')}, 'kappa_score'
        )

        if png_file:
            self.update_sheet('kappa_score', {'raw': png_file, 'processor': 'upload_image'})

Example #9

Source File: test_classification.py From twitter-stock-recommendation with MIT License

5 votes

def test_cohen_kappa():
    # These label vectors reproduce the contingency matrix from Artstein and
    # Poesio (2008), Table 1: np.array([[20, 20], [10, 50]]).
    y1 = np.array([0] * 40 + [1] * 60)
    y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50)
    kappa = cohen_kappa_score(y1, y2)
    assert_almost_equal(kappa, .348, decimal=3)
    assert_equal(kappa, cohen_kappa_score(y2, y1))

    # Add spurious labels and ignore them.
    y1 = np.append(y1, [2] * 4)
    y2 = np.append(y2, [2] * 4)
    assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa)

    assert_almost_equal(cohen_kappa_score(y1, y1), 1.)

    # Multiclass example: Artstein and Poesio, Table 4.
    y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
    y2 = np.array([0] * 52 + [1] * 32 + [2] * 16)
    assert_almost_equal(cohen_kappa_score(y1, y2), .8013, decimal=4)

    # Weighting example: none, linear, quadratic.
    y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
    y2 = np.array([0] * 50 + [1] * 40 + [2] * 10)
    assert_almost_equal(cohen_kappa_score(y1, y2), .9315, decimal=4)
    assert_almost_equal(cohen_kappa_score(y1, y2, weights="linear"), .9412, decimal=4)
    assert_almost_equal(cohen_kappa_score(y1, y2, weights="quadratic"), .9541, decimal=4)

Example #10

Source File: cnn.py From Price_Prediction_LOB with MIT License

5 votes

def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    y_true = []  # true labels
    y_pred = []  # predicted labels
    start_time = time.time()
    for batch, i in enumerate(range(len(train_batch))):
        data, targets = get_batch(train_data, train_batch, i)
        model.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.data

        _, predicted = torch.max(outputs, 1)
        y_true.extend(targets.tolist())
        y_pred.extend(predicted.tolist())

        if (batch + 1) % args.log_interval == 0:
            cur_loss = total_loss.item() / (batch + 1)
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | ms/batch {:5.2f} | '
                    'loss {:5.2f}'.format(
                epoch, batch + 1, len(train_batch), lr,
                elapsed * 1000 / args.log_interval, cur_loss))
            start_time = time.time()
    # compute Cohen's Kappa
    kappa = cohen_kappa_score(y_true, y_pred)
    return total_loss.item() / (batch + 1), kappa

Example #11

Source File: rnn.py From Price_Prediction_LOB with MIT License

5 votes

def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    y_true = []  # true labels
    y_pred = []  # predicted labels
    start_time = time.time()
    hidden = model.init_hidden(args.bsz)
    for batch, i in enumerate(range(len(train_batch))):

        data, targets = get_batch(train_data, train_batch, i)
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output[-1], targets)
        loss.backward()
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()
        total_loss += loss.data

        _, predicted = torch.max(output[-1], 1)
        y_true.extend(targets.tolist())
        y_pred.extend(predicted.tolist())

        if (batch + 1) % args.log_interval == 0:
            cur_loss = total_loss.item() / (batch + 1)
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | ms/batch {:5.2f} | '
                    'loss {:5.2f}'.format(
                epoch, batch + 1, len(train_batch), lr,
                elapsed * 1000 / args.log_interval, cur_loss))
            start_time = time.time()
    # compute Cohen's Kappa
    kappa = cohen_kappa_score(y_true, y_pred)
    return total_loss.item() / (batch + 1), kappa

Example #12

Source File: labeled_reviews_comparator.py From yelp with GNU Lesser General Public License v2.1

5 votes

def cohens_kappa():

    data_folder = '/Users/fpena/UCC/Thesis/datasets/context/manuallyLabeledReviews/'

    business_type = Constants.ITEM_TYPE
    file_name = data_folder + '%s_%s_reviews.json'

    labelers = [
        # 'francisco',
        'diego',
        'mesut',
        'rohit',
    ]

    all_records = [
        load_data(file_name % (labeler, business_type)) for labeler in labelers
    ]

    rater1 = [record['review_type'] for record in all_records[0]]
    rater2 = [record['review_type'] for record in all_records[1]]
    rater3 = [record['review_type'] for record in all_records[2]]

    taskdata = [[0, str(i), str(rater1[i])] for i in range(0, len(rater1))] + [
        [1, str(i), str(rater2[i])] for i in range(0, len(rater2))] + [
                   [2, str(i), str(rater3[i])] for i in range(0, len(rater3))]
    print(taskdata)
    ratingtask = agreement.AnnotationTask(data=taskdata)
    print("Observed agreement " + str(ratingtask.avg_Ao()))
    print("kappa " + str(ratingtask.kappa()))
    print("fleiss " + str(ratingtask.multi_kappa()))
    print("alpha " + str(ratingtask.alpha()))
    print("scotts " + str(ratingtask.pi()))

    print("sklearn kappa " + str(cohen_kappa_score(rater1, rater2)))
    print("sklearn kappa " + str(cohen_kappa_score(rater1, rater3)))
    print("sklearn kappa " + str(cohen_kappa_score(rater2, rater3)))

Example #13

Source File: metrics.py From mimic3-benchmarks with MIT License

5 votes

def print_metrics_regression(y_true, predictions, verbose=1):
    predictions = np.array(predictions)
    predictions = np.maximum(predictions, 0).flatten()
    y_true = np.array(y_true)

    y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true]
    prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions]
    cf = metrics.confusion_matrix(y_true_bins, prediction_bins)
    if verbose:
        print("Custom bins confusion matrix:")
        print(cf)

    kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins,
                                      weights='linear')
    mad = metrics.mean_absolute_error(y_true, predictions)
    mse = metrics.mean_squared_error(y_true, predictions)
    mape = mean_absolute_percentage_error(y_true, predictions)

    if verbose:
        print("Mean absolute deviation (MAD) = {}".format(mad))
        print("Mean squared error (MSE) = {}".format(mse))
        print("Mean absolute percentage error (MAPE) = {}".format(mape))
        print("Cohen kappa score = {}".format(kappa))

    return {"mad": mad,
            "mse": mse,
            "mape": mape,
            "kappa": kappa}

Example #14

Source File: TrainOneClassifier.py From kaggle-rsna18 with MIT License

5 votes

def calculate_metrics(val_results_dict, y_pred, y_val, suffix=""): 
    tmp_kappa_list = []
    tmp_accur_list = [] 
    tmp_f1_list = [] 
    tmp_cm_list = []
    y_val = utils.to_categorical(y_val)[:,-1]
    for each_threshold in np.linspace(0.1, 0.9, 17): 
        tmp_pred = [1 if _ >= each_threshold else 0 for _ in y_pred]
        tmp_kappa_list.append(cohen_kappa_score(tmp_pred, y_val))
        tmp_accur_list.append(accuracy_score(tmp_pred, y_val)) 
        tmp_f1_list.append(f1_score(tmp_pred, y_val))
        tmp_cm_list.append(competitionMetric(tmp_pred, y_val))
    auroc = round(roc_auc_score(y_val, y_pred), 3)
    kappa = round(np.max(tmp_kappa_list), 3)
    accur = round(np.max(tmp_accur_list), 3) 
    cm = round(np.max(tmp_cm_list), 3)
    f1 = round(np.max(tmp_f1_list), 3) 
    val_results_dict["auc{}".format(suffix)].append(auroc)
    val_results_dict["kap{}".format(suffix)].append(kappa)
    val_results_dict["acc{}".format(suffix)].append(accur) 
    val_results_dict["f1{}".format(suffix)].append(f1) 
    val_results_dict["cm{}".format(suffix)].append(cm)
    kappa_threshold = np.linspace(0.1,0.9,17)[tmp_kappa_list.index(np.max(tmp_kappa_list))]
    accur_threshold = np.linspace(0.1,0.9,17)[tmp_accur_list.index(np.max(tmp_accur_list))]
    f1_threshold = np.linspace(0.1,0.9,17)[tmp_f1_list.index(np.max(tmp_f1_list))]
    cm_threshold = np.linspace(0.1,0.9,17)[tmp_cm_list.index(np.max(tmp_cm_list))]
    val_results_dict["threshold_kap{}".format(suffix)].append(round(kappa_threshold, 2))
    val_results_dict["threshold_acc{}".format(suffix)].append(round(accur_threshold, 2))
    val_results_dict["threshold_f1{}".format(suffix)].append(round(f1_threshold, 2))
    val_results_dict["threshold_cm{}".format(suffix)].append(round(cm_threshold, 2))
    return val_results_dict

Example #15

Source File: TrainClassifierEnsemble.py From kaggle-rsna18 with MIT License

5 votes

def calculate_metrics(val_results_dict, y_pred, y_val, suffix=""): 
    tmp_kappa_list = []
    tmp_accur_list = [] 
    tmp_f1_list = [] 
    tmp_cm_list = []
    y_val = utils.to_categorical(y_val)[:,-1]
    for each_threshold in np.linspace(0.1, 0.9, 17): 
        tmp_pred = [1 if _ >= each_threshold else 0 for _ in y_pred]
        tmp_kappa_list.append(cohen_kappa_score(tmp_pred, y_val))
        tmp_accur_list.append(accuracy_score(tmp_pred, y_val)) 
        tmp_f1_list.append(f1_score(tmp_pred, y_val))
        tmp_cm_list.append(competitionMetric(tmp_pred, y_val))
    auroc = round(roc_auc_score(y_val, y_pred), 3)
    kappa = round(np.max(tmp_kappa_list), 3)
    accur = round(np.max(tmp_accur_list), 3) 
    cm = round(np.max(tmp_cm_list), 3)
    f1 = round(np.max(tmp_f1_list), 3) 
    val_results_dict["auc{}".format(suffix)].append(auroc)
    val_results_dict["kap{}".format(suffix)].append(kappa)
    val_results_dict["acc{}".format(suffix)].append(accur) 
    val_results_dict["f1{}".format(suffix)].append(f1) 
    val_results_dict["cm{}".format(suffix)].append(cm)
    kappa_threshold = np.linspace(0.1,0.9,17)[tmp_kappa_list.index(np.max(tmp_kappa_list))]
    accur_threshold = np.linspace(0.1,0.9,17)[tmp_accur_list.index(np.max(tmp_accur_list))]
    f1_threshold = np.linspace(0.1,0.9,17)[tmp_f1_list.index(np.max(tmp_f1_list))]
    cm_threshold = np.linspace(0.1,0.9,17)[tmp_cm_list.index(np.max(tmp_cm_list))]
    val_results_dict["threshold_kap{}".format(suffix)].append(round(kappa_threshold, 2))
    val_results_dict["threshold_acc{}".format(suffix)].append(round(accur_threshold, 2))
    val_results_dict["threshold_f1{}".format(suffix)].append(round(f1_threshold, 2))
    val_results_dict["threshold_cm{}".format(suffix)].append(round(cm_threshold, 2))
    return val_results_dict

Example #16

Source File: mymetrics.py From hyperspectral_deeplearning_review with GNU General Public License v3.0

5 votes

def reports(y_pred, y_test):
    classification = classification_report(y_test, y_pred)
    oa = accuracy_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)
    each_acc, aa = AA_andEachClassAccuracy(confusion)
    kappa = cohen_kappa_score(y_test, y_pred)
    return classification, confusion, np.array([oa, aa, kappa] + list(each_acc)) * 100

Example #17

Source File: TransferLearning_ffd.py From Intelligent-Projects-Using-Python with MIT License

5 votes

def main(self):
        start_time = time.time()
        print('Data Processing..')
        self.num_class = len(self.class_folders)
        model_to_store_path,class_dict = self.train_model(self.train_dir,self.val_dir,n_fold=self.folds,batch_size=self.batch_size,
                                                        epochs=self.epochs,dim=self.dim,lr=self.lr,model=self.model)
        print("Model saved to dest:",model_to_store_path)

        # Validatione evaluate results
        
        folder_path = Path(f'{self.val_dir}')
        val_results_df = self.inference(model_to_store_path,folder_path,class_dict,self.dim)
        val_results_path = f'{self.outdir}/val_results.csv'
        val_results_df.to_csv(val_results_path,index=False)
        print(f'Validation results saved at : {val_results_path}') 
        pred_class_index = np.array(val_results_df['pred_class_index'].values)
        actual_class_index = np.array(val_results_df['actual_class_index'].values)
        print(pred_class_index)
        print(actual_class_index)
        accuracy = np.mean(actual_class_index == pred_class_index)
        kappa = cohen_kappa_score(pred_class_index,actual_class_index,weights='quadratic')
        #print("-----------------------------------------------------")
        print(f'Validation Accuracy: {accuracy}')
        print(f'Validation Quadratic Kappa Score: {kappa}')
        #print("-----------------------------------------------------")
        #print("Processing Time",time.time() - start_time,' secs')

Example #18

Source File: TransferLearning.py From Intelligent-Projects-Using-Python with MIT License

5 votes

def inference_validation(self,test_X,test_y,model_save_dest,n_class=5,folds=5):
		pred = np.zeros((len(test_X),n_class))

		for k in range(1,folds + 1):
			model = keras.models.load_model(model_save_dest[k])
			pred = pred + model.predict(test_X)
		pred = pred/(1.0*folds) 
		pred_class = np.argmax(pred,axis=1) 
		act_class = np.argmax(test_y,axis=1)
		accuracy = np.sum([pred_class == act_class])*1.0/len(test_X)
		kappa = cohen_kappa_score(pred_class,act_class,weights='quadratic')
		return pred_class,accuracy,kappa

Example #19

Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def test_cohen_kappa():
    # These label vectors reproduce the contingency matrix from Artstein and
    # Poesio (2008), Table 1: np.array([[20, 20], [10, 50]]).
    y1 = np.array([0] * 40 + [1] * 60)
    y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50)
    kappa = cohen_kappa_score(y1, y2)
    assert_almost_equal(kappa, .348, decimal=3)
    assert_equal(kappa, cohen_kappa_score(y2, y1))

    # Add spurious labels and ignore them.
    y1 = np.append(y1, [2] * 4)
    y2 = np.append(y2, [2] * 4)
    assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa)

    assert_almost_equal(cohen_kappa_score(y1, y1), 1.)

    # Multiclass example: Artstein and Poesio, Table 4.
    y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
    y2 = np.array([0] * 52 + [1] * 32 + [2] * 16)
    assert_almost_equal(cohen_kappa_score(y1, y2), .8013, decimal=4)

    # Weighting example: none, linear, quadratic.
    y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
    y2 = np.array([0] * 50 + [1] * 40 + [2] * 10)
    assert_almost_equal(cohen_kappa_score(y1, y2), .9315, decimal=4)
    assert_almost_equal(cohen_kappa_score(y1, y2,
                        weights="linear"), 0.9412, decimal=4)
    assert_almost_equal(cohen_kappa_score(y1, y2,
                        weights="quadratic"), 0.9541, decimal=4)

Example #20

Source File: metrics_utils.py From ludwig with Apache License 2.0

5 votes

def kappa_score(self):
        return metrics.cohen_kappa_score(self.conditions, self.predictions)

Example #21

Source File: metrics.py From kaggle-aptos2019-blindness-detection with MIT License

5 votes

def quadratic_weighted_kappa(y_pred, y_true):
    if torch.is_tensor(y_pred):
        y_pred = y_pred.data.cpu().numpy()
    if torch.is_tensor(y_true):
        y_true = y_true.data.cpu().numpy()
    if y_pred.shape[1] == 1:
        y_pred = y_pred[:, 0]
    else:
        y_pred = np.argmax(y_pred, axis=1)
    return metrics.cohen_kappa_score(y_pred, y_true, weights='quadratic')

Example #22

Source File: test_unet.py From eye-in-the-sky with Apache License 2.0

4 votes

def conf_matrix(Y_gt, Y_pred, num_classes = 9):
    
    total_pixels = 0
    kappa_sum = 0
    sudo_confusion_matrix = np.zeros((num_classes, num_classes))
   
#    if len(Y_pred.shape) == 3:
#        h,w,c = Y_pred.shape
#        Y_pred = np.reshape(Y_pred, (1,))
 
    n = len(Y_pred)
    
    for i in range(n):
        y_pred = Y_pred[i]
        y_gt = Y_gt[i]
        
        #y_pred_hotcode = hotcode(y_pred)
        #y_gt_hotcode = hotcode(y_gt)
        
        pred = np.reshape(y_pred, (y_pred.shape[0]*y_pred.shape[1], y_pred.shape[2]))
        gt = np.reshape(y_gt, (y_gt.shape[0]*y_gt.shape[1], y_gt.shape[2]))
        
        pred = [i for i in pred]
        gt = [i for i in gt]
        
        pred = to_class_no(pred)
        gt = to_class_no(gt)
        
#        pred.tolist()
#        gt.tolist()

        gt = np.asarray(gt, dtype = 'int32')
        pred = np.asarray(pred, dtype = 'int32')

        conf_matrix = confusion_matrix(gt, pred, labels=[0,1,2,3,4,5,6,7,8])
        
        kappa = cohen_kappa_score(gt,pred, labels=[0,1,2,3,4,5,6,7])

        pixels = len(pred)
        total_pixels = total_pixels+pixels
        
        sudo_confusion_matrix = sudo_confusion_matrix + conf_matrix
        
        kappa_sum = kappa_sum + kappa

    final_confusion_matrix = sudo_confusion_matrix
    
    final_kappa = kappa_sum/n

    return final_confusion_matrix, final_kappa