Python seaborn.swarmplot() Examples

The following are 10 code examples of seaborn.swarmplot(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module seaborn , or try the search function .
Example #1
Source File: old_test_plotting.py    From DABEST-python with BSD 3-Clause Clear License 6 votes vote down vote up
def test_swarmspan():
    print('Testing swarmspans')

    base_mean = np.random.randint(10, 101)
    seed, ptp, df = create_dummy_dataset(base_mean=base_mean)

    print('\nSeed = {}; base mean = {}'.format(seed, base_mean))

    for c in df.columns[1:-1]:
        print('{}...'.format(c))

        f1, swarmplt = plt.subplots(1)
        sns.swarmplot(data=df[[df.columns[0], c]], ax=swarmplt)
        sns_yspans = []
        for coll in swarmplt.collections:
            sns_yspans.append(get_swarm_yspans(coll))

        f2, b = _api.plot(data=df, idx=(df.columns[0], c))
        dabest_yspans = []
        for coll in f2.axes[0].collections:
            dabest_yspans.append(get_swarm_yspans(coll))

        for j, span in enumerate(sns_yspans):
            assert span == pytest.approx(dabest_yspans[j]) 
Example #2
Source File: analysis.py    From dl-eeg-review with MIT License 5 votes vote down vote up
def plot_number_subjects_by_domain(df, save_cfg=cfg.saving_config):
    """Plot number of subjects in studies by domain.
    """
    # Split values into separate rows and remove invalid values
    col = 'Data - subjects'
    nb_subj_df = ut.split_column_with_multiple_entries(
        df, col, ref_col='Main domain')
    nb_subj_df = nb_subj_df.loc[~nb_subj_df[col].isin(['n/m', 'tbd'])]
    nb_subj_df[col] = nb_subj_df[col].astype(int)
    nb_subj_df = nb_subj_df.loc[nb_subj_df[col] > 0, :]

    nb_subj_df['Main domain'] = nb_subj_df['Main domain'].apply(
        ut.wrap_text, max_char=13)

    fig, ax = plt.subplots(
        figsize=(save_cfg['text_width'] / 3 * 2, save_cfg['text_height'] / 3))
    ax.set(xscale='log', yscale='linear')
    sns.swarmplot(
        y='Main domain', x=col, data=nb_subj_df, 
        ax=ax, size=3, order=nb_subj_df.groupby(['Main domain'])[
            col].median().sort_values().index)
    ax.set_xlabel('Number of subjects')
    ax.set_ylabel('')
    
    logger.info('Stats on number of subjects per model: {}'.format(
        nb_subj_df[col].describe()))

    plt.tight_layout()

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'nb_subject_per_domain')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax 
Example #3
Source File: imputations.py    From autoimpute with MIT License 5 votes vote down vote up
def plot_imp_swarm(d, mi, imp_col, palette=None,
                   title="Imputation Swarm", **plot_kwgs):
    """Create the swarm plot for multiply imputed data.

    Args:
        d (list): dataset returned from multiple imputation.
        mi (MultipleImputer): multiple imputer used to generate d.
        imp_col (str): column to plot. Should be a column with imputations.
        title (str, Optional): title of plot. Default is "Imputation Swarm".
        palette (list, tuple, Optional): colors for the imps and observed.
            Default is None. if None, colors default to ["r","c"].
        **plot_kwgs: keyword arguments used by sns.set.

    Returns:
        sns.distplot: swarmplot for imputed data

    Raises:
        ValueError: see _validate_data method.
    """

    # set plot type, validate, and define names necessary
    _default_plot_args(**plot_kwgs)
    _validate_data(d, mi, imp_col)
    datasets_merged = _melt_df(d, mi, imp_col)
    if palette is None:
        palette = ["r", "c"]

    # swarmplot example
    sns.swarmplot(
        x="imp_num", y=imp_col, hue="imputed", palette=palette,
        data=datasets_merged, hue_order=["yes", "no"]
    ).set(xlabel="Imputation Number", title=title) 
Example #4
Source File: plot_kmer_evenness.py    From EdwardsLab with MIT License 5 votes vote down vote up
def plot_swarm_evenness(df, output, verbose=False):
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting swarmed evenness{bcolors.ENDC}\n")
    sns.violinplot(data=df, x='kmer', y='Evenness')
    sns.swarmplot(data=df, x='kmer', y='Evenness')
    sns.despine(offset=10, trim=True)
    plt.savefig(f"{output}.swarm.evenness.png")
    plt.clf() 
Example #5
Source File: plot_kmer_evenness.py    From EdwardsLab with MIT License 5 votes vote down vote up
def plot_shannon(df, output, verbose=False):
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting swarmed shannon{bcolors.ENDC}\n")
    sns.violinplot(data=df, x='kmer', y='Shannon')
    sns.swarmplot(data=df, x='kmer', y='Shannon')
    sns.despine(offset=10, trim=True)
    plt.savefig(f"{output}.shannon.png")
    plt.clf() 
Example #6
Source File: ageingwip.py    From jira-agile-metrics with MIT License 5 votes vote down vote up
def write(self):
        output_file = self.settings['ageing_wip_chart']
        if not output_file:
            logger.debug("No output file specified for ageing WIP chart")
            return

        chart_data = self.get_result()

        if len(chart_data.index) == 0:
            logger.warning("Unable to draw ageing WIP chart with zero completed items")
            return

        fig, ax = plt.subplots()
        
        if self.settings['ageing_wip_chart_title']:
            ax.set_title(self.settings['ageing_wip_chart_title'])

        sns.swarmplot(x='status', y='age', order=chart_data.columns[4:], data=chart_data, ax=ax)

        ax.set_xlabel("Status")
        ax.set_ylabel("Age (days)")

        ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90)

        _, top = ax.get_ylim()
        ax.set_ylim(0, top)

        set_chart_style()

        # Write file
        logger.info("Writing ageing WIP chart to %s", output_file)
        fig.savefig(output_file, bbox_inches='tight', dpi=300)
        plt.close(fig) 
Example #7
Source File: charting.py    From jira-metrics-extract with MIT License 4 votes vote down vote up
def ageing_wip_chart(cycle_data, start_column, end_column, done_column=None, now=None, title=None, ax=None):
    if len(cycle_data.index) == 0:
        raise UnchartableData("Cannot draw ageing WIP chart with no data")

    if ax is None:
        fig, ax = plt.subplots()
    
    if title is not None:
        ax.set_title(title)

    if now is None:
        now = pd.Timestamp.now()

    if done_column is None:
        done_column = cycle_data.columns[-1]

    today = now.date()

    # remove items that are done
    cycle_data = cycle_data[pd.isnull(cycle_data[done_column])]
    # Check that we still have some data to proceed with.
    if len(cycle_data.index) == 0:
        raise UnchartableData("Cannot draw ageing WIP chart with no data - All items done!")

    cycle_data = pd.concat((
        cycle_data[['key', 'summary']],
        cycle_data.ix[:, start_column:end_column]
    ), axis=1)

    def extract_status(row):
        last_valid = row.last_valid_index()
        if last_valid is None:
            return np.NaN
        return last_valid

    def extract_age(row):
        started = row[start_column]
        if pd.isnull(started):
            return np.NaN
        return (today - started.date()).days

    wip_data = cycle_data[['key', 'summary']].copy()
    wip_data['status'] = cycle_data.apply(extract_status, axis=1)
    wip_data['age'] = cycle_data.apply(extract_age, axis=1)

    wip_data.dropna(how='any', inplace=True)

    sns.swarmplot(x='status', y='age', order=cycle_data.columns[2:], data=wip_data, ax=ax)

    ax.set_xlabel("Status")
    ax.set_ylabel("Age (days)")

    ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90)

    bottom, top = ax.get_ylim()
    ax.set_ylim(0, top)

    return ax 
Example #8
Source File: analysis.py    From dl-eeg-review with MIT License 4 votes vote down vote up
def _plot_results_accuracy_per_domain(results_df, diff_df, save_cfg):
    """Make scatterplot + boxplot to show accuracy difference by domain.
    """
    fig, axes = plt.subplots(
        nrows=2, ncols=1, sharex=True, 
        figsize=(save_cfg['text_width'], save_cfg['text_height'] / 3), 
        gridspec_kw = {'height_ratios':[5, 1]})

    results_df['Main domain'] = results_df['Main domain'].apply(
        ut.wrap_text, max_char=20)

    sns.catplot(y='Main domain', x='acc_diff', s=3, jitter=True, 
                data=results_df, ax=axes[0])
    axes[0].set_xlabel('')
    axes[0].set_ylabel('')
    axes[0].axvline(0, c='k', alpha=0.2)

    sns.boxplot(x='acc_diff', data=diff_df, ax=axes[1])
    sns.swarmplot(x='acc_diff', data=diff_df, color="0", size=2, ax=axes[1])
    axes[1].axvline(0, c='k', alpha=0.2)
    axes[1].set_xlabel('Accuracy difference')

    fig.subplots_adjust(wspace=0, hspace=0.02)
    plt.tight_layout()

    logger.info('Number of studies included in the accuracy improvement analysis: {}'.format(
        results_df.shape[0]))
    median = diff_df['acc_diff'].median()
    iqr = diff_df['acc_diff'].quantile(.75) - diff_df['acc_diff'].quantile(.25)
    logger.info('Median gain in accuracy: {:.6f}'.format(median))
    logger.info('Interquartile range of the gain in accuracy: {:.6f}'.format(iqr))
    best_improvement = diff_df.nlargest(3, 'acc_diff')
    logger.info('Best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[0], 
        best_improvement['Citation'].values[0]))
    logger.info('Second best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[1], 
        best_improvement['Citation'].values[1]))
    logger.info('Third best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[2], 
        best_improvement['Citation'].values[2]))

    if save_cfg is not None:
        savename = 'reported_accuracy_per_domain'
        fname = os.path.join(save_cfg['savepath'], savename)
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return axes 
Example #9
Source File: plots.py    From AlphaPy with Apache License 2.0 4 votes vote down vote up
def plot_swarm(df, x, y, hue, tag='eda', directory=None):
    r"""Display a Swarm Plot.

    Parameters
    ----------
    df : pandas.DataFrame
        The dataframe containing the ``x`` and ``y`` features.
    x : str
        Variable name in ``df`` to display along the x-axis.
    y : str
        Variable name in ``df`` to display along the y-axis.
    hue : str
        Variable name to be used as hue, i.e., another data dimension.
    tag : str
        Unique identifier for the plot.
    directory : str, optional
        The full specification of the plot location.

    Returns
    -------
    None : None.

    References
    ----------

    http://seaborn.pydata.org/generated/seaborn.swarmplot.html

    """

    logger.info("Generating Swarm Plot")

    # Generate the swarm plot

    swarm_plot = sns.swarmplot(x=x, y=y, hue=hue, data=df)
    swarm_fig = swarm_plot.get_figure()

    # Save the plot
    write_plot('seaborn', swarm_fig, 'swarm_plot', tag, directory)


#
# Time Series Plots
#


#
# Function plot_time_series
# 
Example #10
Source File: plotting.py    From fmridenoise with Apache License 2.0 4 votes vote down vote up
def motion_plot(group_conf_summary):
    # Plot style setup
    plt.style.use('seaborn-white')
    plt.rcParams['font.family'] = 'Helvetica'
    colour = ["#fe6863", "#00a074"]
    palette = sns.set_palette(colour)

    small = 15
    plt.rc('font', size=small)  # controls default text sizes
    plt.rc('axes', titlesize=small)  # fontsize of the axes title
    plt.rc('axes', linewidth=2.2)
    plt.rc('axes', labelsize=small)  # fontsize of the x and y labels
    plt.rc('xtick', labelsize=small)  # fontsize of the tick labels
    plt.rc('ytick', labelsize=small)  # fontsize of the tick labels
    plt.rc('legend', fontsize=small)  # legend fontsize
    plt.rc('lines', linewidth=2.2, color='gray')
    # ------------------------------------------

    motion_dict = {'Mean FD': ['mean_fd', 0.2],
                   'Max FD': ['max_fd', 5],
                   'Percent of outlier dataframes (%)': ['perc_spikes', 20]}

    fig, axes = plt.subplots(1, 3, figsize=(16, 7))
    fig.subplots_adjust(wspace=0.4, hspace=0.4)

    i = 0
    for key, value in motion_dict.items():
        plt.figure(figsize=(4, 6))
        p = sns.swarmplot(y=value[0],
                          x="task",
                          hue="include",
                          data=group_conf_summary,
                          alpha=0.8,
                          s=10,
                          color=palette,
                          ax=axes[i]
                          )

        p = sns.boxplot(y=value[0],
                        x="task",
                        data=group_conf_summary,
                        showcaps=False,
                        boxprops={'facecolor': 'None'},
                        showfliers=False, ax=axes[i])

        p.title.set_text(f"Threshold = {value[1]}")
        p.axhline(value[1], ls='--', color="#fe6863")
        p.set(xlabel='')
        p.set(ylabel=key)
        p.get_legend().set_visible(False)
        p.tick_params(axis='both', which='both', length=6, width=2.2)
        i += 1
    fig.suptitle(f"Excluding high motion subjects", va="top")

    return fig