Python seaborn.countplot() Examples

The following are 15 code examples of seaborn.countplot(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module seaborn , or try the search function .
Example #1
Source File: analysis.py    From dl-eeg-review with MIT License 7 votes vote down vote up
def plot_country(df, save_cfg=cfg.saving_config):
    """Plot bar graph showing the country of the first author's affiliation.
    """
    fig, ax = plt.subplots(figsize=(save_cfg['text_width'] / 4 * 3, 
                                    save_cfg['text_height'] / 5))
    sns.countplot(x=df['Country'], ax=ax,
                order=df['Country'].value_counts().index)
    ax.set_ylabel('Number of papers')
    ax.set_xlabel('')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    plt.tight_layout()

    top3 = df['Country'].value_counts().index[:3]
    logger.info('Top 3 countries of first author affiliation: {}'.format(top3.values))

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'country')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax 
Example #2
Source File: analysis.py    From dl-eeg-review with MIT License 6 votes vote down vote up
def plot_model_comparison(df, save_cfg=cfg.saving_config):
    """Plot bar graph showing the types of baseline models used.
    """
    fig, ax = plt.subplots(figsize=(save_cfg['text_width'] / 4 * 2, 
                                    save_cfg['text_height'] / 5))
    sns.countplot(y=df['Baseline model type'].dropna(axis=0), ax=ax)
    ax.set_xlabel('Number of papers')
    ax.set_ylabel('')
    plt.tight_layout()

    model_prcts = df['Baseline model type'].value_counts() / df.shape[0] * 100
    logger.info('% of studies that used at least one traditional baseline: {}'.format(
        model_prcts['Traditional pipeline'] + model_prcts['DL & Trad.']))
    logger.info('% of studies that used at least one deep learning baseline: {}'.format(
        model_prcts['DL'] + model_prcts['DL & Trad.']))
    logger.info('% of studies that did not report baseline comparisons: {}'.format(
        model_prcts['None']))

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'model_comparison')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax 
Example #3
Source File: analysis.py    From dl-eeg-review with MIT License 6 votes vote down vote up
def plot_cross_validation(df, save_cfg=cfg.saving_config):
    """Plot bar graph of cross validation approaches.
    """
    col = 'Cross validation (clean)'
    df[col] = df[col].fillna('N/M')
    cv_df = ut.split_column_with_multiple_entries(
        df, col, ref_col='Citation', sep=';\n', lower=False)
    
    fig, ax = plt.subplots(
        figsize=(save_cfg['text_width'] / 2, save_cfg['text_height'] / 5))
    sns.countplot(y=cv_df[col], order=cv_df[col].value_counts().index, ax=ax)
    ax.set_xlabel('Number of papers')
    ax.set_ylabel('')
    
    plt.tight_layout()

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'cross_validation')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax 
Example #4
Source File: eda.py    From AI_in_Medicine_Clinical_Imaging_Classification with MIT License 6 votes vote down vote up
def plot_classification_frequency(df, category, file_name, convert_labels = False):
    '''
    Plots the frequency at which labels occur

    INPUT
        df: Pandas DataFrame of the image name and labels
        category: category of labels, from 0 to 4
        file_name: file name of the image
        convert_labels: argument specified for converting to binary classification

    OUTPUT
        Image of plot, showing label frequency
    '''
    if convert_labels == True:
        labels['level'] = change_labels(labels, 'level')

    sns.set(style="whitegrid", color_codes=True)
    sns.countplot(x=category, data=labels)
    plt.title('Retinopathy vs Frequency')
    plt.savefig(file_name) 
Example #5
Source File: eda.py    From eyenet with MIT License 6 votes vote down vote up
def plot_classification_frequency(df, category, file_name, convert_labels = False):
    '''
    Plots the frequency at which labels occur

    INPUT
        df: Pandas DataFrame of the image name and labels
        category: category of labels, from 0 to 4
        file_name: file name of the image
        convert_labels: argument specified for converting to binary classification

    OUTPUT
        Image of plot, showing label frequency
    '''
    if convert_labels == True:
        labels['level'] = change_labels(labels, 'level')

    sns.set(style="whitegrid", color_codes=True)
    sns.countplot(x=category, data=labels)
    plt.title('Retinopathy vs Frequency')
    plt.savefig(file_name) 
Example #6
Source File: dataframe_explorer.py    From pandasgui with MIT License 6 votes vote down vote up
def update_plot(self):
            plt.ioff()
            col = self.picker.currentText()

            plt.figure()

            arr = self.df[col].dropna()
            if self.df[col].dtype.name in ['object', 'bool', 'category']:
                ax = sns.countplot(y=arr, color='grey', order=arr.value_counts().iloc[:10].index)

            else:
                ax = sns.distplot(arr, color='black', hist_kws=dict(color='grey', alpha=1))

            self.figure_viewer.setFigure(ax.figure)


# Examples 
Example #7
Source File: analysis.py    From perses with MIT License 5 votes vote down vote up
def plot_chemical_trajectory(self, environment, filename):
        """
        Plot the trajectory through chemical space.

        Parameters
        ----------
        environment : str
            the name of the environment for which the chemical space trajectory is desired
        """
        chemical_state_trajectory = self.extract_state_trajectory(environment)

        visited_states = list(set(chemical_state_trajectory))

        state_trajectory = np.zeros(len(chemical_state_trajectory))
        for idx, chemical_state in enumerate(chemical_state_trajectory):
            state_trajectory[idx] = visited_states.index(chemical_state)

        with PdfPages(filename) as pdf:
            sns.set(font_scale=2)
            fig = plt.figure(figsize=(28, 12))
            plt.subplot2grid((1,2), (0,0))
            ax = sns.scatterplot(np.arange(len(state_trajectory)), state_trajectory)
            plt.yticks(np.arange(len(visited_states)), visited_states)

            plt.title("Trajectory through chemical space in {}".format(environment))
            plt.xlabel("iteration")
            plt.ylabel("chemical state")
            plt.tight_layout()

            plt.subplot2grid((1,2), (0,1))
            ax = sns.countplot(y=state_trajectory)

            pdf.savefig(fig)
            plt.close() 
Example #8
Source File: analysis.py    From dl-eeg-review with MIT License 5 votes vote down vote up
def plot_type_of_paper(df, save_cfg=cfg.saving_config):
    """Plot bar graph showing the type of each paper (journal, conference, etc.).
    """
    # Move supplements to journal paper category for the plot (a value of one is
    # not visible on a bar graph).
    df_plot = df.copy()
    df_plot.loc[df['Type of paper'] == 'Supplement', :] = 'Journal'

    fig, ax = plt.subplots(figsize=(save_cfg['text_width'] / 4, 
                                    save_cfg['text_height'] / 5))
    sns.countplot(x=df_plot['Type of paper'], ax=ax)
    ax.set_xlabel('')
    ax.set_ylabel('Number of papers')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    plt.tight_layout()

    counts = df['Type of paper'].value_counts()
    logger.info('Number of journal papers: {}'.format(counts['Journal']))
    logger.info('Number of conference papers: {}'.format(counts['Conference']))
    logger.info('Number of preprints: {}'.format(counts['Preprint']))
    logger.info('Number of papers that were initially published as preprints: '
                '{}'.format(df[df['Type of paper'] != 'Preprint'][
                    'Preprint first'].value_counts()['Yes']))

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'type_of_paper')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax 
Example #9
Source File: analysis.py    From dl-eeg-review with MIT License 5 votes vote down vote up
def plot_hardware(df, save_cfg=cfg.saving_config):
    """Plot bar graph showing the hardware used in the study.
    """
    col = 'EEG Hardware'
    hardware_df = ut.split_column_with_multiple_entries(
        df, col, ref_col='Citation', sep=',', lower=False)

    # Remove N/Ms because they make it hard to see anything
    hardware_df = hardware_df[hardware_df[col] != 'N/M']
    
    # Add low cost column
    hardware_df['Low-cost'] = False
    low_cost_devices = ['EPOC (Emotiv)', 'OpenBCI (OpenBCI)', 'Muse (InteraXon)', 
                        'Mindwave Mobile (Neurosky)', 'Mindset (NeuroSky)']
    hardware_df.loc[hardware_df[col].isin(low_cost_devices), 
                    'Low-cost'] = True

    fig, ax = plt.subplots(figsize=(save_cfg['text_width'] / 4 * 2, 
                                    save_cfg['text_height'] / 5 * 2))
    sns.countplot(hue=hardware_df['Low-cost'], y=hardware_df[col], ax=ax,
                  order=hardware_df[col].value_counts().index, 
                  dodge=False)
    # sns.catplot(row=hardware_df['low_cost'], y=hardware_df['hardware'])
    ax.set_xlabel('Number of papers')
    ax.set_ylabel('')
    plt.tight_layout()

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'hardware')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax 
Example #10
Source File: DataPrep.py    From Fake_News_Detection with MIT License 5 votes vote down vote up
def create_distribution(dataFile):
    
    return sb.countplot(x='Label', data=dataFile, palette='hls')
    

#by calling below we can see that training, test and valid data seems to be failry evenly distributed between the classes 
Example #11
Source File: visualize_traindata.py    From Supply-demand-forecasting with MIT License 5 votes vote down vote up
def weather_distribution(self):
        data_dir = g_singletonDataFilePath.getTrainDir()
        self.gapdf = self.load_weatherdf(data_dir)
        print self.gapdf['weather'].describe()
#         sns.distplot(self.gapdf['gap'],kde=False, bins=100);
        
        sns.countplot(x="weather", data=self.gapdf, palette="Greens_d");
        plt.title('Countplot of Weather')
#         self.gapdf['weather'].plot(kind='bar')
#         plt.xlabel('Weather')
#         plt.title('Histogram of Weather')
        return 
Example #12
Source File: utils.py    From Machine-Learning-with-Python with MIT License 5 votes vote down vote up
def plot_data(data):
    # barplot for the depencent variable
    sns.countplot(x='y', data=data, palette='hls')
    plt.show()

    # check the missing values
    print(data.isnull().sum())

    # customer distribution plot
    sns.countplot(y='job', data=data)
    plt.show()

    # customer marital status distribution
    sns.countplot(x='marital', data=data)
    plt.show()

    # barplot for credit in default
    sns.countplot(x='default', data=data)
    plt.show()

    # barptot for housing loan
    sns.countplot(x='housing', data=data)
    plt.show()

    # barplot for personal loan
    sns.countplot(x='loan', data=data)
    plt.show()

    # barplot for previous marketing campaign outcome
    sns.countplot(x='poutcome', data=data)
    plt.show() 
Example #13
Source File: brute_force_plotter.py    From brute-force-plotter with MIT License 5 votes vote down vote up
def bar_plot(data, col, hue=None, file_name=None):
    sns.countplot(col, hue=hue, data=data.sort_values(col))
    sns.despine(left=True)

    subplots = [
        x for x in plt.gcf().get_children() if isinstance(x, matplotlib.axes.Subplot)
    ]
    for plot in subplots:
        rectangles = [
            x
            for x in plot.get_children()
            if isinstance(x, matplotlib.patches.Rectangle)
        ]
    autolabel(rectangles) 
Example #14
Source File: plots.py    From compose with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def distribution(self, **kwargs):
        """Plots the label distribution."""
        self._label_times._assert_single_target()
        target_column = self._label_times.target_columns[0]
        dist = self._label_times[target_column]
        is_discrete = self._label_times.is_discrete[target_column]

        if is_discrete:
            ax = sns.countplot(dist, palette=COLOR, **kwargs)
        else:
            ax = sns.distplot(dist, kde=True, color=COLOR[1], **kwargs)

        ax.set_title('Label Distribution')
        ax.set_ylabel('Count')
        return ax 
Example #15
Source File: coco_stats.py    From COCO-Assistant with MIT License 4 votes vote down vote up
def cat_count(anns, names, show_count=False, save=False):

    fig, axes = plt.subplots(1, len(anns), sharey=False)

    # Making axes iterable if only single annotation is present
    if len(anns) == 1:
        axes = [axes]

    # Prepare annotations dataframe
    # This should be done at the start
    for ann, name, ax in zip(anns, names, axes):
        ann_df = pd.DataFrame(ann.anns).transpose()
        if 'category_name' in ann_df.columns:
            chart = sns.countplot(data=ann_df,
                                  x='category_name',
                                  order=ann_df['category_name'].value_counts().index,
                                  palette='Set1',
                                  ax=ax)
        else:
            # Add a new column -> category name
            ann_df['category_name'] = ann_df.apply(lambda row: ann.cats[row.category_id]['name'],axis=1)
            chart = sns.countplot(data=ann_df,
                                  x='category_name',
                                  order=ann_df['category_name'].value_counts().index,
                                  palette='Set1',
                                  ax=ax)

        chart.set_title(name)
        chart.set_xticklabels(chart.get_xticklabels(), rotation=90)

        if show_count is True:
            for p in chart.patches:
                height = p.get_height()
                chart.text(p.get_x() + p.get_width() / 2.,
                           height + 0.9,
                           height,
                           ha="center")

    plt.suptitle('Instances per category', fontsize=14, fontweight='bold')
    plt.tight_layout()

    fig = plt.gcf()
    fig.set_size_inches(11, 11)

    out_dir = os.path.join(os.getcwd(), 'results', 'plots')
    if save is True:
        if os.path.exists(out_dir) is False:
            os.mkdir(out_dir)
        plt.savefig(os.path.join(out_dir, "cat_dist" + ".png"),
                    bbox_inches='tight',
                    pad_inches=0,
                    dpi=plt.gcf().dpi)

    plt.show()