python source code of visualization

#! /usr/bin/env python
# coding=utf-8
# Copyright (c) 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division

import copy
import logging
import sys
from collections import Counter
from sys import platform

import numpy as np

import ludwig.contrib
from ludwig.constants import TRAINING, VALIDATION

logger = logging.getLogger(__name__)

try:
    import matplotlib as mpl

    if platform == "darwin":  # OS X
        mpl.use('TkAgg')
    import matplotlib.patches as patches
    import matplotlib.path as path
    import matplotlib.patheffects as PathEffects
    import matplotlib.pyplot as plt
    import seaborn as sns
    from matplotlib import ticker
    from matplotlib.lines import Line2D
    from mpl_toolkits.mplot3d import Axes3D
except ImportError:
    logger.error(
        ' matplotlib or seaborn are not installed. '
        'In order to install all visualization dependencies run '
        'pip install ludwig[viz]'
    )
    sys.exit(-1)


# plt.rc('xtick', labelsize='x-large')
# plt.rc('ytick', labelsize='x-large')
# plt.rc('axes', labelsize='x-large')

def learning_curves_plot(
        train_values,
        vali_values,
        metric,
        algorithm_names=None,
        title=None,
        filename=None
):
    num_algorithms = len(train_values)
    max_len = max([len(tv) for tv in train_values])

    fig, ax = plt.subplots()

    sns.set_style('whitegrid')

    if title is not None:
        ax.set_title(title)

    if num_algorithms == 1:
        colors = plt.get_cmap('tab10').colors
    else:  # num_algorithms > 1
        colors = plt.get_cmap('tab20').colors

    ax.grid(which='both')
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=0.75)
    ax.set_xlabel('epochs')
    ax.set_ylabel(metric.replace('_', ' '))

    xs = list(range(1, max_len + 1))

    for i in range(num_algorithms):
        name_prefix = algorithm_names[
                          i] + ' ' if algorithm_names is not None and i < len(
            algorithm_names) else ''
        ax.plot(xs[:len(train_values[i])], train_values[i],
                label=name_prefix + TRAINING,
                color=colors[i * 2], linewidth=3)
        if i < len(vali_values) and vali_values[i] is not None and len(
                vali_values[i]) > 0:
            ax.plot(xs[:len(vali_values[i])], vali_values[i],
                    label=name_prefix + VALIDATION,
                    color=colors[i * 2 + 1], linewidth=3)

    ax.legend()
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def compare_classifiers_plot(
        scores,
        metrics,
        algoritm_names=None,
        adaptive=False,
        decimals=4,
        title=None,
        filename=None
):
    assert len(scores) == len(metrics)
    assert len(scores) > 0

    num_metrics = len(metrics)

    sns.set_style('whitegrid')

    fig, ax = plt.subplots()

    ax.grid(which='both')
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=0.75)
    ax.set_xticklabels([], minor=True)

    if title is not None:
        ax.set_title(title)

    width = 0.8 / num_metrics if num_metrics > 1 else 0.4
    ticks = np.arange(len(scores[0]))

    colors = plt.get_cmap('tab10').colors
    if adaptive:
        maximum = max([max(score) for score in scores])
    else:
        ax.set_xlim([0, 1])
        ax.set_xticks(np.linspace(0.0, 1.0, num=21), minor=True)
        ax.set_xticks(np.linspace(0.0, 1.0, num=11))
        maximum = 1

    half_total_width = 0.4 if num_metrics > 1 else 0.2
    ax.set_yticks(ticks + half_total_width - width / 2)
    ax.set_yticklabels(algoritm_names if algoritm_names is not None else '')
    ax.invert_yaxis()  # labels read top-to-bottom

    for i, metric in enumerate(metrics):
        ax.barh(ticks + (i * width), scores[i], width, label=metric,
                color=colors[i])

        for j, v in enumerate(scores[i]):
            if v < maximum * (0.025 * decimals + 0.1):
                x = v + maximum * 0.01
                horizontal_alignment = 'left'
            else:
                x = v - maximum * 0.01
                horizontal_alignment = 'right'
            txt = ax.text(x, ticks[j] + (i * width),
                          ('{:.' + str(decimals) + 'f}').format(v),
                          color='white',
                          fontweight='bold', verticalalignment='center',
                          horizontalalignment=horizontal_alignment)
            txt.set_path_effects(
                [PathEffects.withStroke(linewidth=3, foreground='black')])

    plt.setp(ax.get_xminorticklabels(), visible=False)

    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def compare_classifiers_line_plot(
        xs,
        scores,
        metric,
        algorithm_names=None,
        title=None,
        filename=None
):
    sns.set_style('whitegrid')
    colors = plt.get_cmap('tab10').colors

    fig, ax = plt.subplots()

    ax.grid(which='both')
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=0.75)

    if title is not None:
        ax.set_title(title)

    ax.set_xticks(xs)
    ax.set_xticklabels(xs)
    ax.set_xlabel('k')
    ax.set_ylabel(metric)

    for i, score in enumerate(scores):
        ax.plot(xs, score,
                label=algorithm_names[
                    i] if algorithm_names is not None and i < len(
                    algorithm_names) else 'Algorithm {}'.format(i),
                color=colors[i], linewidth=3, marker='o')

    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def compare_classifiers_multiclass_multimetric_plot(
        scores,
        metrics,
        labels=None,
        title=None,
        filename=None
):
    assert len(scores) > 0

    sns.set_style('whitegrid')

    fig, ax = plt.subplots()

    if title is not None:
        ax.set_title(title)

    width = 0.9 / len(scores)
    ticks = np.arange(len(scores[0]))

    colors = plt.get_cmap('tab10').colors
    ax.set_xlabel('class')
    ax.set_xticks(ticks + width)
    if labels is not None:
        ax.set_xticklabels(labels, rotation=90)
    else:
        ax.set_xticklabels(ticks, rotation=90)

    for i, score in enumerate(scores):
        ax.bar(ticks + i * width, score, width, label=metrics[i],
               color=colors[i])

    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def radar_chart(
        ground_truth,
        predictions,
        algorithms=None,
        log_scale=False,
        title=None,
        filename=None
):
    sns.set_style('whitegrid')

    if title is not None:
        plt.title(title)

    ground_truth = ground_truth[0:10]
    predictions = [pred[0:10] for pred in predictions]

    gt_argsort = np.argsort(-ground_truth)  # sort deacreasing
    logger.info(gt_argsort)
    ground_truth = ground_truth[gt_argsort]
    predictions = [pred[gt_argsort] for pred in predictions]

    maximum = max(max(ground_truth), max([max(p) for p in predictions]))

    ax = plt.subplot(111, polar=True)
    ax.set_theta_zero_location('N')
    ax.set_theta_direction(-1)
    ax.set_rmax(maximum)
    ax.set_rlabel_position(305)
    ax.set_ylabel('Probability')
    # ax.set_rscale('log')
    ax.grid(True)

    colors = plt.get_cmap('tab10').colors

    num_classes = len(ground_truth)

    # Set ticks to the number of properties (in radians)
    t = np.arange(0, 2 * np.pi, 2 * np.pi / num_classes)
    ax.set_xticks(t, [])
    ax.set_xticklabels(np.arange(0, num_classes))

    # Set yticks from 0 to 10
    # ax.set_yticks(np.linspace(0, 10, 11))
    # Set axes limits
    # ax.set_rlim(0, 1)
    # ax.set_rscale('log')

    def draw_polygon(values, label, color='grey'):
        points = [(x, y) for x, y in zip(t, values)]
        points.append(points[0])
        points = np.array(points)

        codes = [path.Path.MOVETO, ] + \
                [path.Path.LINETO, ] * (len(values) - 1) + \
                [path.Path.CLOSEPOLY]
        _path = path.Path(points, codes)
        _patch = patches.PathPatch(_path, fill=True, color=color, linewidth=0,
                                   alpha=.2)
        ax.add_patch(_patch)
        _patch = patches.PathPatch(_path, fill=False, color=color, linewidth=3)
        ax.add_patch(_patch)

        # Draw circles at value points
        # line = ax.scatter(points[:, 0], points[:, 1], linewidth=3,
        #            s=50, color='white', edgecolor=color, zorder=10)
        ax.plot(points[:, 0], points[:, 1], linewidth=3, marker='o',
                fillstyle='full',
                markerfacecolor='white',
                markeredgecolor=color,
                markeredgewidth=2,
                color=color, zorder=10, label=label)

    draw_polygon(ground_truth, 'Ground Truth')

    # Draw polygon representing values
    for i, alg_predictions in enumerate(predictions):
        draw_polygon(alg_predictions, algorithms[i], colors[i])

    ax.legend(frameon=True, loc='upper left')
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def pie(ax, values, **kwargs):
    total = sum(values)

    def formatter(pct):
        if pct > 0:
            return '{:0.0f}\n({:0.1f}%)'.format(pct * total / 100, pct)
        else:
            return ''

    wedges, _, labels = ax.pie(values, autopct=formatter, **kwargs)
    return wedges


def donut(
        inside_values,
        inside_labels,
        outside_values,
        outside_labels,
        outside_groups,
        title=None,
        filename=None
):
    fig, ax = plt.subplots()

    if title is not None:
        ax.set_title(title)

    ax.axis('equal')

    width = 0.35
    colors_tab20c = list(plt.get_cmap('tab20c').colors)
    colors_set2 = list(plt.get_cmap('Set2').colors)
    colors_set3 = list(plt.get_cmap('Set3').colors)
    colors_pastel1 = list(plt.get_cmap('Pastel1').colors)

    # swap green and red
    # for i in range(4):
    #    tmp = colors[4 + i]
    #    colors[4 + i] = colors[8 + i]
    #    colors[8 + i] = tmp

    colors = []
    colors.extend(colors_tab20c[8:12])
    colors.append(colors_set2[5])
    colors.append(colors_set3[11])
    colors.append(colors_set3[1])
    colors.append(colors_pastel1[5])
    colors.extend(colors_tab20c[4:8])

    inside_colors = [colors[x * 4] for x in range(len(inside_values))]

    group_count = Counter(outside_groups)
    outside_colors = [colors[(i * 4) + ((j % 3) + 1)]
                      for i in list(set(outside_groups))
                      for j in range(group_count[i])]

    outside = pie(ax, outside_values, radius=1, pctdistance=1 - width / 2,
                  colors=outside_colors, startangle=90, counterclock=False,
                  textprops={'color': 'w', 'weight': 'bold',
                             'path_effects': [
                                 PathEffects.withStroke(linewidth=3,
                                                        foreground='black')]})
    inside = pie(ax, inside_values, radius=1 - width,
                 pctdistance=1 - (width / 2) / (1 - width),
                 colors=inside_colors, startangle=90, counterclock=False,
                 textprops={'color': 'w', 'weight': 'bold',
                            'path_effects': [PathEffects.withStroke(linewidth=3,
                                                                    foreground='black')]})
    plt.setp(inside + outside, width=width, edgecolor='white')

    wedges = []
    labels = []
    so_far = 0
    for i in list(set(outside_groups)):
        wedges.append(inside[i])
        labels.append(inside_labels[i])
        for j in range(group_count[i]):
            wedges.append(outside[so_far])
            labels.append(outside_labels[so_far])
            so_far += 1

    ax.legend(wedges, labels, frameon=True)
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def confidence_fitlering_plot(
        thresholds,
        accuracies,
        dataset_kepts,
        algorithm_names=None,
        title=None,
        filename=None
):
    assert len(accuracies) == len(dataset_kepts)
    num_algorithms = len(accuracies)

    sns.set_style('whitegrid')

    if num_algorithms == 1:
        colors = plt.get_cmap('tab10').colors
    else:  # num_algorithms > 1
        colors = plt.get_cmap('tab20').colors

    y_ticks_minor = np.linspace(0.0, 1.0, num=21)
    y_ticks_major = np.linspace(0.0, 1.0, num=11)
    y_ticks_major_labels = ['{:3.0f}%'.format(y * 100) for y in y_ticks_major]

    fig, ax1 = plt.subplots()

    if title is not None:
        ax1.set_title(title)

    ax1.grid(which='both')
    ax1.grid(which='minor', alpha=0.5)
    ax1.grid(which='major', alpha=0.75)
    ax1.set_xticks([x for idx, x in enumerate(thresholds) if idx % 2 == 0])
    ax1.set_xticks(thresholds, minor=True)

    ax1.set_xlim(-0.05, 1.05)
    ax1.set_xlabel('confidence threshold')

    ax1.set_ylim(0, 1.05)
    ax1.set_yticks(y_ticks_major)
    ax1.set_yticklabels(y_ticks_major_labels)
    ax1.set_yticks(y_ticks_minor, minor=True)

    ax2 = ax1.twinx()

    ax2.set_ylim(0, 1.05)
    ax2.set_yticks(y_ticks_major)
    ax2.set_yticklabels(y_ticks_major_labels)
    ax2.set_yticks(y_ticks_minor, minor=True)

    for i in range(len(accuracies)):
        algorithm_name = algorithm_names[
                             i] + ' ' if algorithm_names is not None and i < len(
            algorithm_names) else ''
        ax1.plot(thresholds, accuracies[i],
                 label='{} accuracy'.format(algorithm_name),
                 color=colors[i * 2],
                 linewidth=3)
        ax1.plot(thresholds, dataset_kepts[i],
                 label='{} data coverage'.format(algorithm_name),
                 color=colors[i * 2 + 1], linewidth=3)

    ax1.legend(frameon=True, loc=3)
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def confidence_fitlering_data_vs_acc_plot(
        accuracies,
        dataset_kepts,
        model_names=None,
        dotted=False,
        decimal_digits=0,
        y_label='accuracy',
        title=None,
        filename=None
):
    assert len(accuracies) == len(dataset_kepts)

    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab10').colors

    max_dataset_kept = max(
        [max(dataset_kept) for dataset_kept in dataset_kepts])

    x_ticks_minor = np.linspace(0.0, max_dataset_kept, num=21)
    x_ticks_major = np.linspace(0.0, max_dataset_kept, num=11)
    x_ticks_major_labels = [
        '{value:3.{decimal_digits}f}%'.format(
            decimal_digits=decimal_digits,
            value=x * 100
        ) for x in x_ticks_major
    ]
    y_ticks_minor = np.linspace(0.0, 1.0, num=21)
    y_ticks_major = np.linspace(0.0, 1.0, num=11)

    fig, ax = plt.subplots()

    if title is not None:
        ax.set_title(title)

    ax.grid(which='both')
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=0.75)
    ax.set_xticks(x_ticks_major)
    ax.set_xticks(x_ticks_minor, minor=True)
    ax.set_xticklabels(x_ticks_major_labels)
    ax.set_xlim(0, max_dataset_kept)
    ax.set_xlabel('data coverage')

    ax.set_ylim(0, 1)
    ax.set_yticks(y_ticks_major)
    ax.set_yticks(y_ticks_minor, minor=True)
    ax.set_ylabel(y_label)

    for i in range(len(accuracies)):
        curr_dotted = dotted[i] if isinstance(dotted,
                                              (list, tuple)) and i < len(
            dotted) else dotted
        algorithm_name = model_names[
                             i] + ' ' if model_names is not None and i < len(
            model_names) else ''
        ax.plot(dataset_kepts[i], accuracies[i], label=algorithm_name,
                color=colors[i],
                linewidth=3, linestyle=':' if curr_dotted else '-')

    ax.legend(frameon=True, loc=3)
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def confidence_fitlering_data_vs_acc_multiline_plot(
        accuracies,
        dataset_kepts,
        models_names,
        title=None,
        filename=None
):
    assert len(accuracies) == len(dataset_kepts)

    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab20').colors

    max_dataset_kept = max(
        [max(dataset_kept) for dataset_kept in dataset_kepts])

    x_ticks_minor = np.linspace(0.0, max_dataset_kept, num=21)
    x_ticks_major = np.linspace(0.0, max_dataset_kept, num=11)
    x_ticks_major_labels = ['{:3.0f}%'.format(x * 100) for x in x_ticks_major]
    y_ticks_minor = np.linspace(0.0, 1.0, num=21)
    y_ticks_major = np.linspace(0.0, 1.0, num=11)

    fig, ax = plt.subplots()

    if title is not None:
        ax.set_title(title)

    ax.grid(which='both')
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=0.75)
    ax.set_xticks(x_ticks_major)
    ax.set_xticks(x_ticks_minor, minor=True)
    ax.set_xticklabels(x_ticks_major_labels)
    ax.set_xlim(0, max_dataset_kept)
    ax.set_xlabel('data coverage')

    ax.set_ylim(0, 1)
    ax.set_yticks(y_ticks_major)
    ax.set_yticks(y_ticks_minor, minor=True)
    ax.set_ylabel('accuracy')

    for i in range(len(accuracies)):
        ax.plot(dataset_kepts[i], accuracies[i], color=colors[0],
                linewidth=1.0, alpha=0.35)

    legend_elements = [Line2D([0], [0], linewidth=1.0, color=colors[0])]
    ax.legend(legend_elements, models_names)
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def confidence_fitlering_3d_plot(
        thresholds_1,
        thresholds_2,
        accuracies,
        dataset_kepts,
        threshold_output_feature_names=None,
        title=None,
        filename=None
):
    assert len(accuracies) == len(dataset_kepts)
    assert len(thresholds_1) == len(thresholds_2)

    thresholds_1, thresholds_2 = np.meshgrid(thresholds_1, thresholds_2)

    colors = plt.get_cmap('tab10').colors
    sns.set_style('white')

    z_ticks_minor = np.linspace(0.0, 1.0, num=21)
    z_ticks_major = np.linspace(0.0, 1.0, num=11)
    z_ticks_major_labels = ['{:3.0f}%'.format(z * 100) for z in z_ticks_major]

    fig = plt.figure()
    ax = Axes3D
    ax = fig.add_subplot(111, projection='3d')

    if title is not None:
        ax.set_title(title)

    ax.grid(which='both')
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=0.75)

    ax.set_xlabel('{} probability'.format(threshold_output_feature_names[0]))
    ax.set_ylabel('{} probability'.format(threshold_output_feature_names[1]))

    ax.set_xlim(np.min(thresholds_1), np.max(thresholds_1))
    ax.set_ylim(np.min(thresholds_2), np.max(thresholds_2))
    ax.set_zlim(0, 1)
    ax.set_zticks(z_ticks_major)
    ax.set_zticklabels(z_ticks_major_labels)
    ax.set_zticks(z_ticks_minor, minor=True)

    # ORRIBLE HACK, IT'S THE ONLY WAY TO REMOVE PADDING
    from mpl_toolkits.mplot3d.axis3d import Axis
    if not hasattr(Axis, '_get_coord_info_old'):
        def _get_coord_info_new(self, renderer):
            mins, maxs, centers, deltas, tc, highs = self._get_coord_info_old(
                renderer)
            mins += deltas / 4
            maxs -= deltas / 4
            return mins, maxs, centers, deltas, tc, highs

        Axis._get_coord_info_old = Axis._get_coord_info
        Axis._get_coord_info = _get_coord_info_new
    # END OF HORRIBLE HACK

    surf_1 = ax.plot_surface(thresholds_1, thresholds_2, accuracies,
                             alpha=0.5,
                             label='accuracy',
                             cmap=plt.get_cmap('winter'),
                             edgecolor='none')
    surf_2 = ax.plot_surface(thresholds_1, thresholds_2, dataset_kepts,
                             alpha=0.5,
                             label='data coverage',
                             cmap=plt.get_cmap('autumn'),
                             edgecolor='none')

    handle_1 = copy.copy(surf_1)
    handle_2 = copy.copy(surf_2)

    handle_1.set_color(colors[0])
    handle_2.set_color(colors[1])

    handle_1._edgecolors2d = handle_1._edgecolors3d
    handle_2._edgecolors2d = handle_2._edgecolors3d

    handle_1._facecolors2d = handle_1._facecolors3d
    handle_2._facecolors2d = handle_2._facecolors3d

    ax.legend(frameon=True, loc=3, handles=[handle_1, handle_2])

    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def threshold_vs_metric_plot(
        thresholds,
        scores,
        algorithm_names=None,
        title=None,
        filename=None
):
    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab10').colors

    # y_ticks_minor = np.linspace(0.0, 1.0, num=21)
    # y_ticks_major = np.linspace(0.0, 1.0, num=11)
    # y_ticks_major_labels = ['{:3.0f}%'.format(y * 100) for y in y_ticks_major]

    fig, ax1 = plt.subplots()

    if title is not None:
        ax1.set_title(title)

    ax1.grid(which='both')
    ax1.grid(which='minor', alpha=0.5)
    ax1.grid(which='major', alpha=0.75)
    ax1.set_xticks([x for idx, x in enumerate(thresholds) if idx % 2 == 0])
    ax1.set_xticks(thresholds, minor=True)

    # ax1.set_xlim(0, 1)
    ax1.set_xlabel('confidence threshold')

    # ax1.set_ylim(0, 1)
    # ax1.set_yticks(y_ticks_major)
    # ax1.set_yticklabels(y_ticks_major_labels)
    # ax1.set_yticks(y_ticks_minor, minor=True)

    for i in range(len(scores)):
        algorithm_name = algorithm_names[
                             i] + ' ' if algorithm_names is not None and i < len(
            algorithm_names) else ''
        ax1.plot(thresholds, scores[i], label=algorithm_name, color=colors[i],
                 linewidth=3, marker='o')

    ax1.legend(frameon=True)
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def roc_curves(
        fpr_tprs,
        algorithm_names=None,
        title=None,
        graded_color=False,
        filename=None
):
    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab10').colors
    colormap = plt.get_cmap('RdYlGn')

    y_ticks_minor = np.linspace(0.0, 1.0, num=21)
    y_ticks_major = np.linspace(0.0, 1.0, num=11)

    fig, ax = plt.subplots()

    if title is not None:
        ax.set_title(title)

    ax.grid(which='both')
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=0.75)

    ax.set_xlim(0, 1)
    ax.set_xlabel('False positive rate')

    ax.set_ylim(0, 1)
    ax.set_yticks(y_ticks_major)
    ax.set_yticks(y_ticks_minor, minor=True)
    ax.set_ylabel('True positive rate')

    plt.plot([0, 1], [0, 1], color='black', linewidth=3, linestyle='--')

    for i in range(len(fpr_tprs)):
        algorithm_name = algorithm_names[
                             i] + ' ' if algorithm_names is not None and i < len(
            algorithm_names) else ''
        color = colormap(i / len(fpr_tprs)) if graded_color else colors[i]
        ax.plot(fpr_tprs[i][0], fpr_tprs[i][1], label=algorithm_name,
                color=color,
                linewidth=3)

    ax.legend(frameon=True)
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def calibration_plot(
        fraction_positives,
        mean_predicted_values,
        algorithm_names=None,
        filename=None
):
    assert len(fraction_positives) == len(mean_predicted_values)

    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab10').colors

    num_algorithms = len(fraction_positives)

    plt.figure(figsize=(9, 9))
    plt.grid(which='both')
    plt.grid(which='minor', alpha=0.5)
    plt.grid(which='major', alpha=0.75)

    plt.plot([0, 1], [0, 1], 'k:', label='Perfectly calibrated')

    for i in range(num_algorithms):
        # ax1.plot(mean_predicted_values[i], fraction_positives[i],
        #         label=algorithms[i] if algorithm_names is not None and i < len(algorithms) else '')

        # sns.tsplot(mean_predicted_values[i], fraction_positives[i], ax=ax1, color=colors[i])

        assert len(mean_predicted_values[i]) == len(fraction_positives[i])
        order = min(3, len(mean_predicted_values[i]) - 1)

        sns.regplot(mean_predicted_values[i], fraction_positives[i],
                    order=order, x_estimator=np.mean, color=colors[i],
                    marker='o', scatter_kws={'s': 40},
                    label=algorithm_names[
                        i] if algorithm_names is not None and i < len(
                        algorithm_names) else '')


    ticks = np.linspace(0.0, 1.0, num=11)
    plt.xlim([-0.05, 1.05])
    plt.xticks(ticks)
    plt.xlabel('Predicted probability')
    plt.ylabel('Observed probability')
    plt.ylim([-0.05, 1.05])
    plt.yticks(ticks)
    plt.legend(loc='lower right')
    plt.title('Calibration (reliability curve)')

    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def brier_plot(
        brier_scores,
        algorithm_names=None,
        title=None,
        filename=None
):
    sns.set_style('whitegrid')

    if title is not None:
        plt.title(title)

    colors = plt.get_cmap('tab10').colors

    plt.grid(which='both')
    plt.grid(which='minor', alpha=0.5)
    plt.grid(which='major', alpha=0.75)
    plt.xlabel('class')
    plt.ylabel('brier')

    x = np.array(range(brier_scores.shape[0]))
    for i in range(brier_scores.shape[1]):
        plt.plot(brier_scores[:, i],
                 label=algorithm_names[
                           i] + ' ' if algorithm_names is not None and i < len(
                     algorithm_names) else '',
                 color=colors[i], linewidth=3)

    plt.legend()
    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def predictions_distribution_plot(
        probabilities,
        algorithm_names=None,
        filename=None
):
    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab10').colors

    num_algorithms = len(probabilities)

    plt.figure(figsize=(9, 9))
    plt.grid(which='both')
    plt.grid(which='minor', alpha=0.5)
    plt.grid(which='major', alpha=0.75)

    for i in range(num_algorithms):
        plt.hist(probabilities[i], range=(0, 1), bins=41, color=colors[i],
                 label=algorithm_names[
                     i] if algorithm_names is not None and i < len(
                     algorithm_names) else '',
                 histtype='stepfilled', alpha=0.5, lw=2)

    plt.xlabel('Mean predicted value')
    plt.xlim([0, 1])
    plt.xticks(np.linspace(0.0, 1.0, num=21))
    plt.ylabel('Count')
    plt.legend(loc='upper center', ncol=2)

    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def confusion_matrix_plot(
        confusion_matrix,
        labels=None,
        output_feature_name=None,
        filename=None
):
    mpl.rcParams.update({'figure.autolayout': True})
    fig, ax = plt.subplots()

    ax.invert_yaxis()
    ax.xaxis.tick_top()
    ax.xaxis.set_label_position('top')

    cax = ax.matshow(confusion_matrix, cmap='viridis')

    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.set_xticklabels([''] + labels, rotation=45, ha='left')
    ax.set_yticklabels([''] + labels)
    ax.grid(False)
    ax.tick_params(axis='both', which='both', length=0)
    fig.colorbar(cax, ax=ax, extend='max')
    ax.set_xlabel('Predicted {}'.format(output_feature_name))
    ax.set_ylabel('Actual {}'.format(output_feature_name))

    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def double_axis_line_plot(
        y1_sorted,
        y2,
        y1_name,
        y2_name,
        labels=None,
        title=None,
        filename=None
):
    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab10').colors

    fig, ax1 = plt.subplots()

    if title is not None:
        ax1.set_title(title)

    # ax1.grid(which='both')
    # ax1.grid(which='minor', alpha=0.5)
    # ax1.grid(which='major', alpha=0.75)

    ax1.set_xlabel('class (sorted by {})'.format(y1_name))
    ax1.set_xlim(0, len(y1_sorted) - 1)
    if labels is not None:
        ax1.set_xticklabels(labels, rotation=45, ha='right')
        ax1.set_xticks(np.arange(len(labels)))

    ax1.set_ylabel(y1_name, color=colors[1])
    ax1.tick_params('y', colors=colors[1])
    ax1.set_ylim(min(y1_sorted), max(y1_sorted))

    ax2 = ax1.twinx()
    ax2.set_ylabel(y2_name, color=colors[0])
    ax2.tick_params('y', colors=colors[0])
    ax2.set_ylim(min(y2), max(y2))

    ax1.plot(y1_sorted, label=y1_name, color=colors[1],
             linewidth=4)
    ax2.plot(y2, label=y2_name, color=colors[0],
             linewidth=3)

    fig.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def plot_matrix(
        matrix,
        cmap='hot',
        filename=None
):
    plt.matshow(matrix, cmap=cmap)
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def plot_distributions(
        distributions,
        labels=None,
        title=None,
        filename=None
):
    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab10').colors

    fig, ax1 = plt.subplots()

    if title is not None:
        ax1.set_title(title)

    ax1.grid(which='both')
    ax1.grid(which='minor', alpha=0.5)
    ax1.grid(which='major', alpha=0.75)

    ax1.set_xlabel('class')

    ax1.set_ylabel('p')
    ax1.tick_params('y')

    for i, distribution in enumerate(distributions):
        ax1.plot(distribution, color=colors[i], alpha=0.6,
                 label=labels[i] if labels is not None and i < len(
                     labels) else 'Distribution {}'.format(i))

    ax1.legend(frameon=True)
    fig.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def plot_distributions_difference(
        distribution,
        labels=None,
        title=None,
        filename=None
):
    sns.set_style('whitegrid')

    colors = plt.get_cmap('tab10').colors

    fig, ax1 = plt.subplots()

    if title is not None:
        ax1.set_title(title)

    ax1.grid(which='both')
    ax1.grid(which='minor', alpha=0.5)
    ax1.grid(which='major', alpha=0.75)

    ax1.set_xlabel('class')

    ax1.set_ylabel('p')
    ax1.tick_params('y')

    ax1.plot(distribution, color=colors[0])

    fig.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()


def bar_plot(
        xs,
        ys,
        decimals=4,
        labels=None,
        title=None,
        filename=None
):
    assert len(xs) == len(ys)
    assert len(xs) > 0

    sns.set_style('whitegrid')

    fig, ax = plt.subplots()

    ax.grid(which='both')
    ax.grid(which='minor', alpha=0.5)
    ax.grid(which='major', alpha=0.75)

    if title is not None:
        ax.set_title(title)

    colors = plt.get_cmap('tab10').colors

    ax.invert_yaxis()  # labels read top-to-bottom

    maximum = ys.max()
    ticks = np.arange(len(xs))
    ax.set_yticks(ticks)
    if labels is None:
        ax.set_yticklabels(xs)
    else:
        ax.set_yticklabels(labels)

    ax.barh(ticks, ys, color=colors[0], align='center')

    for i, v in enumerate(ys):
        if v < maximum * (0.025 * decimals + 0.1):
            x = v + maximum * 0.01
            horizontal_alignment = 'left'
        else:
            x = v - maximum * 0.01
            horizontal_alignment = 'right'
        txt = ax.text(x, ticks[i], ('{:.' + str(decimals) + 'f}').format(v),
                      color='white',
                      fontweight='bold', verticalalignment='center',
                      horizontalalignment=horizontal_alignment)
        txt.set_path_effects(
            [PathEffects.withStroke(linewidth=3, foreground='black')])

    plt.tight_layout()
    ludwig.contrib.contrib_command("visualize_figure", plt.gcf())
    if filename:
        plt.savefig(filename)
    else:
        plt.show()