Python seaborn.violinplot() Examples

The following are 30 code examples of seaborn.violinplot(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module seaborn , or try the search function .
Example #1
Source File: timeplots.py    From NanoPlot with GNU General Public License v3.0 7 votes vote down vote up
def quality_over_time(dfs, path, figformat, title, plot_settings={}):
    time_qual = Plot(path=path + "TimeQualityViolinPlot." + figformat,
                     title="Violin plot of quality over time")
    sns.set(style="white", **plot_settings)
    ax = sns.violinplot(x="timebin",
                        y="quals",
                        data=dfs,
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Basecall quality",
           title=title or time_qual.title)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_qual.fig = ax.get_figure()
    time_qual.save(format=figformat)
    plt.close("all")
    return time_qual 
Example #2
Source File: timeplots.py    From NanoPlot with GNU General Public License v3.0 7 votes vote down vote up
def sequencing_speed_over_time(dfs, path, figformat, title, plot_settings={}):
    time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot." + figformat,
                         title="Violin plot of sequencing speed over time")
    sns.set(style="white", **plot_settings)
    if "timebin" not in dfs:
        dfs['timebin'] = add_time_bins(dfs)
    mask = dfs['duration'] != 0
    ax = sns.violinplot(x=dfs.loc[mask, "timebin"],
                        y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"],
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Sequencing speed (nucleotides/second)",
           title=title or time_duration.title)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_duration.fig = ax.get_figure()
    time_duration.save(format=figformat)
    plt.close("all")
    return time_duration 
Example #3
Source File: brute_force_plotter.py    From brute-force-plotter with MIT License 7 votes vote down vote up
def bar_box_violin_dot_plots(data, category_col, numeric_col, axes, file_name=None):
    sns.barplot(category_col, numeric_col, data=data, ax=axes[0])
    sns.boxplot(
        category_col, numeric_col, data=data[data[numeric_col].notnull()], ax=axes[2]
    )
    sns.violinplot(
        category_col,
        numeric_col,
        data=data,
        kind="violin",
        inner="quartile",
        scale="count",
        split=True,
        ax=axes[3],
    )
    sns.stripplot(category_col, numeric_col, data=data, jitter=True, ax=axes[1])
    sns.despine(left=True) 
Example #4
Source File: plotlib.py    From mCaller with MIT License 6 votes vote down vote up
def plot_change_by_pos(diffs_by_context,plottype='box'):
    fig = plt.figure(figsize=(6,4))
    changes_by_position = {'position':[],'base':[],'diff':[]}
    for lab in diffs_by_context:
        for context in diffs_by_context[lab]:
            for entry in diffs_by_context[lab][context]:
                for pos,diff in enumerate(entry[:-1]):
                    changes_by_position['position'].append(pos+1)
                    changes_by_position['base'].append(lab)
                    changes_by_position['diff'].append(diff)
    dPos = pd.DataFrame(changes_by_position)
    if plottype == 'box':
        sns.boxplot(x="position", y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]])
    elif plottype == 'violin':
        sns.violinplot(x="position",y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]])
    sns.despine(trim=False)
    plt.xlabel('Adenine Position in 6-mer')
    plt.ylabel('Measured - Expected Current (pA)')
    plt.ylim([-20,20])
    plt.legend(title='',loc='upper center', bbox_to_anchor=(0.5, 1.05),
          ncol=3, fancybox=True)
    plt.savefig('change_by_position_box.pdf',transparent=True,dpi=500, bbox_inches='tight') 
Example #5
Source File: figure.py    From DrugEx with MIT License 6 votes vote down vote up
def fig9():
    """ violin plot for the physicochemical proerties comparison.
            1: molecules generated by DrugEx with pre-trained model as exploration network.
            2: molecules generated by DrugEx with fine-tuned model as exploration network.
        """
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(211)
    sns.set(style="white", palette="pastel", color_codes=True)
    df = properties(mol_paths + real_path, labels + real_label, is_active=True)
    sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, bw=0.8)
    sns.despine(left=True)
    ax1.set(ylim=[0.0, 15.0], xlabel='Structural Properties')

    ax2 = fig.add_subplot(212)
    df = properties(mol_paths1 + real_path, labels + real_label, is_active=True)
    sns.set(style="white", palette="pastel", color_codes=True)
    sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, bw=0.8)
    sns.despine(left=True)
    ax2.set(ylim=[0.0, 15.0], xlabel='Structural Properties')
    fig.tight_layout()
    fig.savefig('Figure_9.tif', dpi=300) 
Example #6
Source File: figure.py    From DrugEx with MIT License 6 votes vote down vote up
def fig6():
    """ violin plot for the physicochemical proerties comparison.
        A: molecules generated by pre-trained model v.s. ZINC set.
        B: molecules generated by fine-tuned model v.s. A2AR set.
    """
    plt.figure(figsize=(12, 6))
    plt.subplot(121)
    sns.set(style="white", palette="pastel", color_codes=True)
    df = properties(['data/ZINC_B.txt', 'mol_p.txt'], ['ZINC Dataset', 'Pre-trained Model'])
    sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, split=True, bw=1)
    sns.despine(left=True)
    plt.ylim([0.0, 18.0])
    plt.xlabel('Structural Properties')

    plt.subplot(122)
    df = properties(['data/CHEMBL251.txt', 'mol_ex.txt'], ['A2AR Dataset', 'Fine-tuned Model'])
    sns.set(style="white", palette="pastel", color_codes=True)
    sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, split=True, bw=1)
    sns.despine(left=True)
    plt.ylim([0.0, 18.0])
    plt.xlabel('Structural Properties')
    plt.tight_layout()
    plt.savefig('Figure_6.tif', dpi=300) 
Example #7
Source File: structureViewer.py    From mmtf-pyspark with Apache License 2.0 6 votes vote down vote up
def metal_distance_widget(df_concat):
    '''Plot an violinplot of metal-element distances with ipywidgets

    Parameters
    ----------
    df_concat : Dataframe
       dataframe of metal-elements distances

    '''
    metals = df_concat['Metal'].unique().tolist()
    m_widget = Dropdown(options = metals, description = "Metals")

    def metal_distance_violinplot(metal):
        df_metal = df_concat[df_concat["Metal"] == metal].copy()
        df_metal['Element'] = df_metal['Element'].apply(lambda x: metal+"-"+x)

        # Set fonts
        fig, ax = plt.subplots()
        fig.set_size_inches(15,6)
        subplot = sns.violinplot(x="Element", y="Distance", palette="muted", data=df_metal, ax=ax)
        subplot.set(xlabel="Metal Interactions", ylabel="Distance", title=f"{metal} to Elements Distances Violin Plot")

    return interact(metal_distance_violinplot, metal=m_widget); 
Example #8
Source File: pltfile.py    From CatLearn with GNU General Public License v3.0 6 votes vote down vote up
def featselect_featvar_plot(p_error_select, number_feat):
    """Create learning curve with data size and prediction error.

    Parameters
    ----------
    data_size : list
        Data_size for where the prediction were made.
    p_error : list
        Error for where the prediction were made.
    data_size_mean : list
        Mean of the data size in a sub-set.
    p_error_mean : list
        The mean error for the sub-set.
    corrected_std : array
        The standard deaviation for the sub-set of data.
    """
    fig = plt.figure()
    fig.add_subplot(111)
    sns.violinplot(x=number_feat, y=p_error_select, scale="count")
    sns.pointplot(x=number_feat, y=p_error_select)
    plt.legend(loc='upper right')
    plt.ylabel('Prediction error')
    plt.xlabel('Data size')
    plt.show() 
Example #9
Source File: pltfile.py    From CatLearn with GNU General Public License v3.0 6 votes vote down vote up
def violinplot(set_size, p_error, subplot, i):
    """Make learning cuves with violinplot.

    Parameters
    ----------
    set_size : list
       Size of sub-set of data/features which the model is based on.
    p_error : list
       The prediction error for plain vanilla ridge.
    subplot : int
        Which subplot being produced.
    i : int
       Which iteration in the featureselection.
    """
    plt.figure(1)
    plt.subplot(int("22" + str(subplot))).set_title('Feature size ' + str(i),
                                                    loc='left')
    plt.legend(loc='upper right')
    plt.ylabel('Prediction error')
    plt.xlabel('Data size')
    sns.violinplot(x=set_size, y=p_error, scale="count")
    sns.pointplot(x=set_size, y=p_error, ci=100, capsize=.2)
    if subplot == 4:
        plt.show() 
Example #10
Source File: umbilical.py    From geosketch with MIT License 6 votes vote down vote up
def violin_jitter(X, genes, gene, labels, focus, background=None,
                  xlabels=None):
    gidx = list(genes).index(gene)

    focus_idx = focus == labels
    if background is None:
        background_idx = focus != labels
    else:
        background_idx = background == labels

    if xlabels is None:
        xlabels = [ 'Background', 'Focus' ]

    x_gene = X[:, gidx].toarray().flatten()
    x_focus = x_gene[focus_idx]
    x_background = x_gene[background_idx]
    
    plt.figure()
    sns.violinplot(data=[ x_focus, x_background ], scale='width', cut=0)
    sns.stripplot(data=[ x_focus, x_background ], jitter=True, color='black', size=1)
    plt.xticks([0, 1], xlabels)
    plt.savefig('{}_violin_{}.png'.format(NAMESPACE, gene)) 
Example #11
Source File: mouse_brain_astrocyte.py    From geosketch with MIT License 5 votes vote down vote up
def astro_oligo_violin(X, genes, gene, labels, name):
    X = X.toarray()

    gidx = list(genes).index(gene)

    astro = X[labels == 'astro', gidx]
    oligo = X[labels == 'oligo', gidx]
    both = X[labels == 'both', gidx]

    plt.figure()
    sns.violinplot(data=[ astro, oligo, both ], scale='width', cut=0)
    sns.stripplot(data=[ astro, oligo, both ], jitter=True, color='black', size=1)
    plt.xticks([0, 1, 2], ['Astrocytes', 'Oligodendrocytes', 'Both'])
    plt.savefig('{}_violin_{}.svg'.format(name, gene)) 
Example #12
Source File: timeplots.py    From NanoPlot with GNU General Public License v3.0 5 votes vote down vote up
def length_over_time(dfs, path, figformat, title, log_length=False, plot_settings={}):
    if log_length:
        time_length = Plot(path=path + "TimeLogLengthViolinPlot." + figformat,
                           title="Violin plot of log read lengths over time")
    else:
        time_length = Plot(path=path + "TimeLengthViolinPlot." + figformat,
                           title="Violin plot of read lengths over time")
    sns.set(style="white", **plot_settings)
    if log_length:
        length_column = "log_lengths"
    else:
        length_column = "lengths"

    if "length_filter" in dfs:  # produced by NanoPlot filtering of too long reads
        temp_dfs = dfs[dfs["length_filter"]]
    else:
        temp_dfs = dfs

    ax = sns.violinplot(x="timebin",
                        y=length_column,
                        data=temp_dfs,
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Read length",
           title=title or time_length.title)
    if log_length:
        ticks = [10**i for i in range(10) if not 10**i > 10 * np.amax(dfs["lengths"])]
        ax.set(yticks=np.log10(ticks),
               yticklabels=ticks)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_length.fig = ax.get_figure()
    time_length.save(format=figformat)
    plt.close("all")
    return time_length 
Example #13
Source File: plots.py    From cdlib with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def plot_com_stat(com_clusters, com_fitness):
    """
    Plot the distribution of a property among all communities for a clustering, or a list of clusterings (violin-plots)

    :param com_clusters: list of clusterings to compare, or a single clustering
    :param com_fitness: the fitness/community property to use
    :return: the violin-plots

    Example:

    >>> from cdlib import algorithms, viz, evaluation
    >>> import networkx as nx
    >>> g = nx.karate_club_graph()
    >>> coms = algorithms.louvain(g)
    >>> coms2 = algorithms.walktrap(g)
    >>> violinplot = viz.plot_com_stat([coms,coms2],evaluation.size)

    """
    if isinstance(com_clusters, cdlib.classes.clustering.Clustering):
        com_clusters = [com_clusters]

    allVals = []
    allNames = []
    for c in com_clusters:
        prop = com_fitness(c.graph, c, summary=False)
        allVals += prop
        allNames += [c.get_description()] * len(prop)

    ax = sns.violinplot(allNames, allVals,cut=0,saturation=0.5,palette="Set3")
    for tick in ax.get_xticklabels():
        tick.set_rotation(90)

    plt.ylabel("%s" % com_fitness.__name__)
    plt.xlabel("Algorithm")
    plt.tight_layout()




    return ax 
Example #14
Source File: mouse_brain_subcluster.py    From geosketch with MIT License 5 votes vote down vote up
def astro_oligo_violin(X, genes, gene, labels, name):
    X = X.toarray()

    gidx = list(genes).index(gene)

    astro = X[labels == 'astro', gidx]
    oligo = X[labels == 'oligo', gidx]
    both = X[labels == 'both', gidx]

    plt.figure()
    sns.violinplot(data=[ astro, oligo, both ], scale='width', cut=0)
    sns.stripplot(data=[ astro, oligo, both ], jitter=True, color='black', size=1)
    plt.xticks([0, 1, 2], ['Astrocytes', 'Oligodendrocytes', 'Both'])
    plt.savefig('{}_violin_{}.svg'.format(name, gene)) 
Example #15
Source File: plot_kmer_evenness.py    From EdwardsLab with MIT License 5 votes vote down vote up
def plot_shannon(df, output, verbose=False):
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting swarmed shannon{bcolors.ENDC}\n")
    sns.violinplot(data=df, x='kmer', y='Shannon')
    sns.swarmplot(data=df, x='kmer', y='Shannon')
    sns.despine(offset=10, trim=True)
    plt.savefig(f"{output}.shannon.png")
    plt.clf() 
Example #16
Source File: plot_kmer_evenness.py    From EdwardsLab with MIT License 5 votes vote down vote up
def plot_swarm_evenness(df, output, verbose=False):
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting swarmed evenness{bcolors.ENDC}\n")
    sns.violinplot(data=df, x='kmer', y='Evenness')
    sns.swarmplot(data=df, x='kmer', y='Evenness')
    sns.despine(offset=10, trim=True)
    plt.savefig(f"{output}.swarm.evenness.png")
    plt.clf() 
Example #17
Source File: plot_kmer_evenness.py    From EdwardsLab with MIT License 5 votes vote down vote up
def plot_evenness(df, output, verbose=False):
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting evenness{bcolors.ENDC}\n")
    sns.violinplot(data=df, x='kmer', y='Evenness')
    sns.despine(offset=10, trim=True)
    plt.savefig(f"{output}.evenness.png")
    plt.clf() 
Example #18
Source File: brute_force_plotter.py    From brute-force-plotter with MIT License 5 votes vote down vote up
def histogram_violin_plots(data, axes, file_name=None):
    # histogram
    sns.distplot(data, ax=axes[0], axlabel="")
    sns.violinplot(data, ax=axes[1], inner="quartile", scale="count")
    sns.despine(left=True) 
Example #19
Source File: plot.py    From speedml with MIT License 5 votes vote down vote up
def ordinal(self, y):
        """
        Plot ordinal features (categorical numeric) using Violin plot against target feature. Use this to determine outliers within ordinal features spread across associated target feature values.
        """
        Base.data_n()
        plt.figure(figsize=(8,4))
        sns.violinplot(x=Base.target, y=y, data=Base.train_n)
        plt.xlabel(Base.target, fontsize=12)
        plt.ylabel(y, fontsize=12)
        plt.show(); 
Example #20
Source File: adjacency.py    From nltools with MIT License 5 votes vote down vote up
def plot_label_distance(self, labels=None, ax=None):
        ''' Create a violin plot indicating within and between label distance

            Args:
                labels (np.array):  numpy array of labels to plot

            Returns:
                f: violin plot handles

        '''

        if not self.is_single_matrix:
            raise ValueError('This function only works on single adjacency '
                             'matrices.')

        distance = pd.DataFrame(self.squareform())

        if labels is None:
            labels = np.array(deepcopy(self.labels))
        else:
            if len(labels) != distance.shape[0]:
                raise ValueError('Labels must be same length as distance matrix')

        out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
        for i in np.unique(labels):
            tmp_w = pd.DataFrame(columns=out.columns, index=None)
            tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)]
            tmp_w['Type'] = 'Within'
            tmp_w['Group'] = i
            tmp_b = pd.DataFrame(columns=out.columns, index=None)
            tmp_b['Distance'] = distance.loc[labels != i, labels != i].values[np.triu_indices(sum(labels == i), k=1)]
            tmp_b['Type'] = 'Between'
            tmp_b['Group'] = i
            out = out.append(tmp_w).append(tmp_b)
        f = sns.violinplot(x="Group", y="Distance", hue="Type", data=out, split=True, inner='quartile',
                           palette={"Within": "lightskyblue", "Between": "red"}, ax=ax)
        f.set_ylabel('Average Distance')
        f.set_title('Average Group Distance')
        return 
Example #21
Source File: plots.py    From Comparative-Annotation-Toolkit with Apache License 2.0 5 votes vote down vote up
def horizontal_violin_plot(data, ordered_genomes, title, xlabel, pdf, hue=None, x=None, y=None, xlim=None):
    """not so generic function that specifically produces a paired boxplot/violinplot"""
    fig, ax = plt.subplots()
    sns.violinplot(data=data, x=x, y=y, hue=hue, order=ordered_genomes, palette=choose_palette(ordered_genomes),
                   saturation=boxplot_saturation, orient='h', cut=0, scale='count', ax=ax)
    fig.suptitle(title)
    ax.set_xlabel(xlabel)
    if xlim is not None:
        ax.set_xlim(xlim)
    multipage_close(pdf, tight_layout=False) 
Example #22
Source File: analyze_hostguest.py    From SAMPL6 with MIT License 5 votes vote down vote up
def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        n_rows = len(self.data.system_id.unique())
        fig, ax = plt.subplots(figsize=(6, 0.4*n_rows))
        sns.violinplot(y='system_id', x='$\Delta\Delta$G error (calc - expt)  [kcal/mol]',
                       data=self.data, linewidth=1.0, inner='point', cut=0, ax=ax)
        plt.tight_layout(pad=0.2)
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.MOLECULE_CORRELATION_PLOT_PATH)) 
Example #23
Source File: sct_compute_hausdorff_distance.py    From spinalcordtoolbox with MIT License 5 votes vote down vote up
def show_results(self):
        import seaborn as sns
        import matplotlib.pyplot as plt
        import pandas as pd
        plt.hold(True)
        sns.set(style="whitegrid", palette="pastel", color_codes=True)
        plt.figure(figsize=(35, 20))

        data_dist = {"distances": [], "image": [], "slice": []}

        if self.dim_im == 2:
            data_dist["distances"].append([dist * self.dim_pix for dist in self.dist1_distribution])
            data_dist["image"].append(len(self.dist1_distribution) * [1])
            data_dist["slice"].append(len(self.dist1_distribution) * [0])

            data_dist["distances"].append([dist * self.dim_pix for dist in self.dist2_distribution])
            data_dist["image"].append(len(self.dist2_distribution) * [2])
            data_dist["slice"].append(len(self.dist2_distribution) * [0])

        if self.dim_im == 3:
            for i in range(len(self.distances)):
                data_dist["distances"].append([dist * self.dim_pix for dist in self.dist1_distribution[i]])
                data_dist["image"].append(len(self.dist1_distribution[i]) * [1])
                data_dist["slice"].append(len(self.dist1_distribution[i]) * [i])
                data_dist["distances"].append([dist * self.dim_pix for dist in self.dist2_distribution[i]])
                data_dist["image"].append(len(self.dist2_distribution[i]) * [2])
                data_dist["slice"].append(len(self.dist2_distribution[i]) * [i])

        for k in data_dist.keys():  # flatten the lists in data_dist
            data_dist[k] = [item for sublist in data_dist[k] for item in sublist]

        data_dist = pd.DataFrame(data_dist)
        sns.violinplot(x="slice", y="distances", hue="image", data=data_dist, split=True, inner="point", cut=0)
        plt.savefig('violin_plot.png')
        # plt.show()


# ---------------------------------------------------------------------------------------------------------------------- 
Example #24
Source File: typeI_analysis.py    From SAMPL6 with MIT License 5 votes vote down vote up
def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_pKa_ID = self.data.sort_values(["pKa ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='pKa ID', x='$\Delta$pKa error (calc - exp)', data=data_ordered_by_pKa_ID,
                           inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.PKA_CORRELATION_PLOT_BY_PKA_PATH_DIR)) 
Example #25
Source File: typeIII_analysis.py    From SAMPL6 with MIT License 5 votes vote down vote up
def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_pKa_ID = self.data.sort_values(["pKa ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='pKa ID', x='$\Delta$pKa error (calc - exp)', data=data_ordered_by_pKa_ID,
                           inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.PKA_CORRELATION_PLOT_BY_PKA_PATH_DIR)) 
Example #26
Source File: logP_analysis.py    From SAMPL6 with MIT License 5 votes vote down vote up
def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_mol_ID = self.data.sort_values(["Molecule ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='Molecule ID', x='$\Delta$logP error (calc - exp)', data=data_ordered_by_mol_ID,
                           inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.LOGP_CORRELATION_PLOT_BY_LOGP_PATH_DIR)) 
Example #27
Source File: utils.py    From dl-eeg-review with MIT License 5 votes vote down vote up
def run_kruskal(df, condition_col, value_col='acc_diff', min_n_obs=6, 
                plot=False):
    """Run Kruskal-Wallis analysis of variance test.

    Args:
        df (pd.DataFrame): dataframe where each row is a paper.
        condition_col (str): name of column to use as condition.

    Keyword Args:
        value_col (str): name of column to use as the numerical value to run the
            test on.
        min_n_obs (int): minimum number of observations in each sample in order
            to run the test.

    Returns:
        (float): U statistic
        (float): p-value
    """
    data = [i for name, i in df.groupby(condition_col)[value_col]
            if len(i) >= min_n_obs]

    if len(data) > 2:
        stat, p = kruskal(*data)
    else:
        stat, p = np.nan, np.nan
        print('Not enough samples with more than {} observations.'.format(min_n_obs))

    if plot:
        enough_samples = df[condition_col].value_counts() >= min_n_obs
        enough_samples = enough_samples.index[enough_samples].tolist()
        fig, ax = plt.subplots()
        sns.violinplot(
            data=df[df[condition_col].isin(enough_samples)], x=condition_col, 
            y=value_col, ax=ax)
        ax.set_title('Kruskal-Wallis for {} vs. {}\n(pvalue={:0.4f})'.format(
            condition_col, value_col, p))
    else:
        fig = None

    return {'test': 'kruskal', 'pvalue': p, 'stat': stat, 'fig': fig} 
Example #28
Source File: logP_analysis.py    From SAMPL6 with MIT License 5 votes vote down vote up
def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_mol_ID = self.data.sort_values(["Molecule ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='Molecule ID', x='$\Delta$logP error (calc - exp)', data=data_ordered_by_mol_ID,
                           inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.LOGP_CORRELATION_PLOT_BY_LOGP_PATH_DIR)) 
Example #29
Source File: plotting.py    From QUANTAXIS with MIT License 4 votes vote down vote up
def plot_quantile_returns_violin(return_by_q, ylim_percentiles=None, ax=None):
    return_by_q = return_by_q.copy()

    if ylim_percentiles is not None:
        ymin = (
            np.nanpercentile(return_by_q.values,
                             ylim_percentiles[0]) * DECIMAL_TO_BPS
        )
        ymax = (
            np.nanpercentile(return_by_q.values,
                             ylim_percentiles[1]) * DECIMAL_TO_BPS
        )
    else:
        ymin = None
        ymax = None

    if ax is None:
        f, ax = plt.subplots(1, 1, figsize=(18, 6))

    unstacked_dr = return_by_q.multiply(DECIMAL_TO_BPS)
    unstacked_dr.columns = unstacked_dr.columns.set_names("forward_periods")
    unstacked_dr = unstacked_dr.stack()
    unstacked_dr.name = "return"
    unstacked_dr = unstacked_dr.reset_index()

    sns.violinplot(
        data=unstacked_dr,
        x="factor_quantile",
        hue="forward_periods",
        y="return",
        orient="v",
        cut=0,
        inner="quartile",
        ax=ax,
    )
    ax.set(
        xlabel="",
        ylabel="Return (bps)",
        title="Period Wise Return By Factor Quantile",
        ylim=(ymin,
              ymax),
    )

    ax.axhline(0.0, linestyle="-", color="black", lw=0.7, alpha=0.6)

    return ax 
Example #30
Source File: utils.py    From dl-eeg-review with MIT License 4 votes vote down vote up
def run_mannwhitneyu(df, condition_col, conditions, value_col='acc_diff',
                     min_n_obs=10, plot=False):
    """Run Mann-Whitney rank-sum test.

    Args:
        df (pd.DataFrame): dataframe where each row is a paper.
        condition_col (str): name of column to use as condition.
        conditions (list): list of two strings containing the values of the
            condition to compare.

    Keyword Args:
        value_col (str): name of column to use as the numerical value to run the
            test on.
        min_n_obs (int): minimum number of observations in each sample in order
            to run the test.

    Returns:
        (float): U statistic
        (float): p-value
    """
    assert len(conditions) == 2, '`conditions` must be of length 2, got {}'.format(
        len(conditions))
    data1 = df[df[condition_col] == conditions[0]][value_col]
    data2 = df[df[condition_col] == conditions[1]][value_col]

    if len(data1) >= min_n_obs and len(data2) >= min_n_obs:
        stat, p = mannwhitneyu(data1, data2)
    else:
        stat, p = np.nan, np.nan
        print('Not enough observations in each sample ({} and {}).'.format(
            len(data1), len(data2)))

    if plot:
        fig, ax = plt.subplots()
        sns.violinplot(
            data=df[df[condition_col].isin(conditions)], x=condition_col, 
            y=value_col, ax=ax)
        ax.set_title('Mann-Whitney for {} vs. {}\n(pvalue={:0.4f})'.format(
            condition_col, value_col, p))
    else:
        fig = None

    return {'test': 'mannwhitneyu', 'pvalue': p, 'stat': stat, 'fig': fig}