Python seaborn.pairplot() Examples

The following are 20 code examples of seaborn.pairplot(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module seaborn , or try the search function .
Example #1
Source File: stock_visualizer.py    From stock-analysis with MIT License 6 votes vote down vote up
def jointplot(self, other, column, **kwargs):
        """
        Generate a seaborn jointplot for given column in asset compared to
        another asset.

        Parameters:
            - other: The other asset's dataframe
            - column: The column name to use for the comparison.
            - kwargs: Keyword arguments to pass down to `sns.pairplot()`

        Returns:
            A seaborn jointplot
        """
        return sns.jointplot(
            x=self.data[column],
            y=other[column],
            **kwargs
        ) 
Example #2
Source File: stock_visualizer.py    From stock-analysis with MIT License 6 votes vote down vote up
def pairplot(self, **kwargs):
        """
        Generate a seaborn pairplot for this asset group.

        Parameters:
            - kwargs: Keyword arguments to pass down to `sns.pairplot()`

        Returns:
            A seaborn pairplot
        """
        return sns.pairplot(
            self.data.pivot_table(
                values='close', index=self.data.index, columns='name'
            ),
            diag_kind='kde',
            **kwargs
        ) 
Example #3
Source File: clustering_kmeans_search_alternative.py    From practicalDataAnalysisCookbook with GNU General Public License v2.0 6 votes vote down vote up
def plotInteractions(data, n_clusters):
    '''
        Plot the interactions between variables
    '''
    # cluster the data
    cluster = findClusters_kmeans(data, n_clusters)

    # append the labels to the dataset for ease of plotting
    data['clus'] = cluster.labels_

    # prepare the plot
    ax = sns.pairplot(selected, hue='clus')

    # and save the figure
    ax.savefig(
        '../../Data/Chapter04/k_means_{0}_clusters.png' \
        .format(n_clusters)
    )


# the file name of the dataset 
Example #4
Source File: document.py    From DQLearning-Toolbox with MIT License 5 votes vote down vote up
def savePair(df,samplesize=20000):
    df1 = df.sample(samplesize)
    sns.set(style="ticks")
    sns.set_context("paper")
    sns.pairplot(df1)
    plt.title('Pair Graph')
    plt.savefig(pair_path)

#画滑动平均图,默认12阶 
Example #5
Source File: EDA.py    From exploripy with MIT License 5 votes vote down vote up
def ScatterPlot(self):
		start = time.time()
		sns.set(style="ticks", color_codes=True)
		this_dir, this_filename = os.path.split(__file__)
		OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/Scatter.png')
		fig, ax = plt.subplots()
		ax = sns.pairplot(self.df[self.ContinuousFeatures].dropna(),markers="+",palette="husl",kind="reg", plot_kws={'line_kws':{'color':'orange'}})
		plt.savefig(OutFileName)
		end = time.time()
		if self.debug == 'YES':
			print('ScatterPlot',end-start)
		return OutFileName 
Example #6
Source File: poiRegression.py    From python-urbanPlanning with MIT License 5 votes vote down vote up
def basicStat(dataBunch):
    sns.set(style='whitegrid',context='notebook')
    cols=['lat','lng','price','overall_rating','service_rating','facility_rating','hygiene_rating','image_num','comment_num','favorite_num','checkin_num']  #用于标识frame数据框的列索引
    frame=pd.DataFrame(dataBunch.data[:],columns=cols)  #转换为pandas库的frame数据框格式,方便数据观察和提取
#    print(frame)
    sns.pairplot(frame[cols],size=2.5)  #两两数据的散点图,用于观察数据间的关系
    plt.show()    
   
    cm=np.corrcoef(frame[cols].values.T)  #计算两两间的相关系数
    sns.set(font_scale=1.3)
    hm=sns.heatmap(cm,cbar=True,annot=True,square=True,fmt='.2f',annot_kws={'size':13},yticklabels=cols,xticklabels=cols) #热力图显示相关系数,方便直观查看
    plt.show 
Example #7
Source File: reduce_iris_sample_size_lvq.py    From neupy with MIT License 5 votes vote down vote up
def plot_scattermatrix(data, target):
    df = pd.DataFrame(data)
    df['target'] = target
    return sns.pairplot(df, hue='target', diag_kind='hist') 
Example #8
Source File: scrap_log.py    From ffjord with MIT License 5 votes vote down vote up
def plot_pairplot(csv_filename, fig_filename, top=None):
    import seaborn as sns
    import pandas as pd

    sns.set(style="ticks", color_codes=True)
    quants = pd.read_csv(csv_filename)
    if top is not None:
        quants = quants[:top]

    g = sns.pairplot(quants, kind='reg', diag_kind='kde', markers='.')
    g.savefig(fig_filename) 
Example #9
Source File: visualization.py    From default-credit-card-prediction with MIT License 5 votes vote down vote up
def visualize_hist_pairplot(X,y,selected_feature1,selected_feature2,features,diag_kind):
	"""
	Visualize the pairwise relationships (Histograms and Density Funcions) between classes and respective attributes

	Keyword arguments:
	X -- The feature vectors
	y -- The target vector
	selected_feature1 - First feature
	selected_feature1 - Second feature
	diag_kind -- Type of plot in the diagonal (Histogram or Density Function)
	"""

	#create data
	joint_data=np.column_stack((X,y))
	column_names=features

	#create dataframe
	df=pd.DataFrame(data=joint_data,columns=column_names)

	#plot
	palette = sea.hls_palette()
	splot=sea.pairplot(df, hue="Y", palette={0:palette[2],1:palette[0]},vars=[selected_feature1,selected_feature2],diag_kind=diag_kind)
	splot.fig.suptitle('Pairwise relationship: '+selected_feature1+" vs "+selected_feature2)
	splot.set(xticklabels=[])
	# plt.subplots_adjust(right=0.94, top=0.94)

	#save fig
	output_dir = "img"
	save_fig(output_dir,'{}/{}_{}_hist_pairplot.png'.format(output_dir,selected_feature1,selected_feature2))
	# plt.show() 
Example #10
Source File: plotfunctions.py    From DataScience-webapp-with-flask with MIT License 5 votes vote down vote up
def plot_correlations(ds, corr, corrcat):
    sns.set()
    plt.gcf().clear()
    if corrcat != '': sns.pairplot(ds[corr], hue = corrcat)
    else: sns.pairplot(ds[corr])
    from io import BytesIO
    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0)  # rewind to beginning of file
    import base64
    figdata_png = base64.b64encode(figfile.getvalue())
    return figdata_png 
Example #11
Source File: vis_corex.py    From bio_corex with Apache License 2.0 5 votes vote down vote up
def plot_pairplots(data, labels, alpha, mis, column_label, topk=5, prefix='', focus=''):
    cmap = sns.cubehelix_palette(as_cmap=True, light=.9)
    plt.rcParams.update({'font.size': 32})
    m, nv = mis.shape
    for j in range(m):
        inds = np.where(np.logical_and(alpha[j] > 0, mis[j] > 0.))[0]
        inds = inds[np.argsort(- alpha[j, inds] * mis[j, inds])][:topk]
        if focus in column_label:
            ifocus = column_label.index(focus)
            if not ifocus in inds:
                inds = np.insert(inds, 0, ifocus)
        if len(inds) >= 2:
            plt.clf()
            subdata = data[:, inds]
            columns = [column_label[i] for i in inds]
            subdata = pd.DataFrame(data=subdata, columns=columns)

            try:
                sns.pairplot(subdata, kind="reg", diag_kind="kde", height=5, dropna=True)
                filename = '{}/pairplots_regress/group_num={}.pdf'.format(prefix, j)
                if not os.path.exists(os.path.dirname(filename)):
                    os.makedirs(os.path.dirname(filename))
                plt.suptitle("Latent factor {}".format(j), y=1.01)
                plt.savefig(filename, bbox_inches='tight')
                plt.clf()
            except:
                pass

            subdata['Latent factor'] = labels[:,j]
            try:
                sns.pairplot(subdata, kind="scatter", dropna=True, vars=subdata.columns.drop('Latent factor'), hue="Latent factor", diag_kind="kde", height=5)
                filename = '{}/pairplots/group_num={}.pdf'.format(prefix, j)
                if not os.path.exists(os.path.dirname(filename)):
                    os.makedirs(os.path.dirname(filename))
                plt.suptitle("Latent factor {}".format(j), y=1.01)
                plt.savefig(filename, bbox_inches='tight')
                plt.close('all')
            except:
                pass 
Example #12
Source File: plotting.py    From kvae with MIT License 5 votes vote down vote up
def plot_auxiliary(all_vars, filename, table_size=4):
    # All variables need to be (batch_size, sequence_length, dimension)
    for i, a in enumerate(all_vars):
        if a.ndim == 2:
            all_vars[i] = np.expand_dims(a, 0)

    dim = all_vars[0].shape[-1]
    if dim == 2:
        f, ax = plt.subplots(table_size, table_size, sharex='col', sharey='row', figsize=[12, 12])
        idx = 0
        for x in range(table_size):
            for y in range(table_size):
                for a in all_vars:
                    # Loop over the batch dimension
                    ax[x, y].plot(a[idx, :, 0], a[idx, :, 1], linestyle='-', marker='o', markersize=3)
                    # Plot starting point of the trajectory
                    ax[x, y].plot(a[idx, 0, 0], a[idx, 0, 1], 'r.', ms=12)
                idx += 1
        # plt.show()
        plt.savefig(filename, format='png', bbox_inches='tight', dpi=80)
        plt.close()
    else:
        df_list = []
        for i, a in enumerate(all_vars):
            df = pd.DataFrame(all_vars[i].reshape(-1, dim))
            df['class'] = i
            df_list.append(df)

        df_all = pd.concat(df_list)
        sns_plot = sns.pairplot(df_all, hue="class", vars=range(dim))
        sns_plot.savefig(filename)
    plt.close() 
Example #13
Source File: atlas3.py    From ssbio with MIT License 5 votes vote down vote up
def make_pairplot(self, num_components_to_plot=4, outpath=None, dpi=150):
        # Get columns
        components_to_plot = [self.principal_observations_df.columns[x] for x in range(num_components_to_plot)]

        # Plot
        plot = sns.pairplot(data=self.principal_observations_df, hue=self.observation_colname,
                                vars=components_to_plot, markers=self.markers, size=4)
        plt.subplots_adjust(top=.95)
        plt.suptitle(self.plot_title)

        if outpath:
            plot.fig.savefig(outpath, dpi=dpi)
        else:
            plt.show()
        plt.close() 
Example #14
Source File: stock_visualizer.py    From stock-analysis with MIT License 5 votes vote down vote up
def pairplot(self, **kwargs):
        """
        Generate a seaborn pairplot for this asset.

        Parameters:
            - kwargs: Keyword arguments to pass down to `sns.pairplot()`

        Returns:
            A seaborn pairplot
        """
        return sns.pairplot(self.data, **kwargs) 
Example #15
Source File: stock_visualizer.py    From stock-analysis with MIT License 5 votes vote down vote up
def pairplot(self, **kwargs):
        """To be implemented by subclasses for generating pairplots."""
        raise NotImplementedError('To be implemented by subclasses!') 
Example #16
Source File: plots.py    From AlphaPy with Apache License 2.0 4 votes vote down vote up
def plot_scatter(df, features, target, tag='eda', directory=None):
    r"""Plot a scatterplot matrix, also known as a pair plot.

    Parameters
    ----------
    df : pandas.DataFrame
        The dataframe containing the features.
    features: list of str
        The features to compare in the scatterplot.
    target : str
        The target variable for contrast.
    tag : str
        Unique identifier for the plot.
    directory : str, optional
        The full specification of the plot location.

    Returns
    -------
    None : None.

    References
    ----------

    https://seaborn.pydata.org/examples/scatterplot_matrix.html

    """

    logger.info("Generating Scatter Plot")

    # Get the feature subset

    features.append(target)
    df = df[features]

    # Generate the pair plot

    sns.set()
    sns_plot = sns.pairplot(df, hue=target)

    # Save the plot
    write_plot('seaborn', sns_plot, 'scatter_plot', tag, directory)


#
# Function plot_facet_grid
# 
Example #17
Source File: visualize.py    From pipelines with Apache License 2.0 4 votes vote down vote up
def datahtml(
    bucket_name,
    commit_sha,
    train_file_path
):
    import json
    import seaborn as sns
    import matplotlib.pyplot as plt
    import os
    image_path = os.path.join(bucket_name, commit_sha, 'visualization.png')
    image_url = os.path.join('https://storage.googleapis.com', bucket_name.lstrip('gs://'), commit_sha, 'visualization.png')
    html_path = os.path.join(bucket_name, 'kaggle.html')
    # ouptut visualization to a file

    import pandas as pd
    df_train = pd.read_csv(train_file_path)
    sns.set()
    cols = ['SalePrice', 'OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt']
    sns.pairplot(df_train[cols], size = 3)
    plt.savefig('visualization.png')
    from tensorflow.python.lib.io import file_io
    file_io.copy('visualization.png', image_path)
    rendered_template = """
    <html>
        <head>
            <title>correlation image</title>
        </head>
        <body>
            <img src={}>
        </body>
    </html>""".format(image_url)
    file_io.write_string_to_file(html_path, rendered_template)

    metadata = {
        'outputs' : [{
        'type': 'web-app',
        'storage': 'gcs',
        'source': html_path,
        }]
    }
    with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f) 
Example #18
Source File: plotUtils.py    From pyodds with MIT License 4 votes vote down vote up
def visualize_outlierscore(value,label,contamination,path=None):
    """
    Visualize the predicted outlier score.

    Parameters
    ----------
    value: numpy array of shape (n_test, )
        The outlier score of the test data.
    label: numpy array of shape (n_test, )
        The label of test data produced by the algorithm.
    contamination : float in (0., 0.5), optional (default=0.1)
        The amount of contamination of the data set,
        i.e. the proportion of outliers in the data set. Used when fitting to
        define the threshold on the decision function.
    path: string
        The saving path for result figures.
    """

    sns.set(style="darkgrid")

    ts = np.arange(len(value))
    outlier_label=[]
    for i in range(len(ts)):
        if label[i]==1:
            outlier_label.append('inlier')
        else:
            outlier_label.append('outlier')
    X_outlier = pd.DataFrame({'ts':ts,'Outlier_score':value,'outlier_label':np.array(outlier_label)})
    pal = dict(inlier="#4CB391", outlier="gray")
    g = sns.FacetGrid(X_outlier, hue="outlier_label", palette=pal, height=5)
    g.map(plt.scatter, "ts", "Outlier_score", s=30, alpha=.7, linewidth=.5, edgecolor="white")

    ranking = np.sort(value)
    threshold = ranking[int((1 - contamination) * len(ranking))]
    plt.hlines(threshold, xmin=0, xmax=len(X_outlier)-1, colors="g", zorder=100, label='Threshold')
    threshold = ranking[int((contamination) * len(ranking))]
    plt.hlines(threshold, xmin=0, xmax=len(X_outlier)-1, colors="g", zorder=100, label='Threshold2')
    if path:
        plt.savefig(path+'/visualize_outlierscore.png')
    plt.show()



# def visualize_outlierresult(X,label,path=None):
#     """
#     Visualize the predicted outlier result.
#
#     Parameters
#     ----------
#     X: numpy array of shape (n_test, n_features)
#         The test data.
#     label: numpy array of shape (n_test, )
#         The label of test data produced by the algorithm.
#
#     """
#     X['outlier']=pd.Series(label)
#     pal = dict(inlier="#4CB391", outlier="gray")
#     g = sns.pairplot(X, hue="outlier", palette=pal)
#     if path:
#         plt.savefig(path+'/visualize_outlierresult.png')
#     plt.show() 
Example #19
Source File: sampling.py    From pyPESTO with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def sampling_scatter(
        result: Result,
        i_chain: int = 0,
        stepsize: int = 1,
        suptitle: str = None,
        size: Tuple[float, float] = None):
    """Parameter scatter plot.

    Parameters
    ----------
    result:
        The pyPESTO result object with filled sample result.
    i_chain:
        Which chain to plot. Default: First chain.
    stepsize:
        Only one in `stepsize` values is plotted.
    suptitle:
        Figure super title.
    size:
        Figure size in inches.

    Returns
    -------
    ax:
        The plot axes.
    """

    # get data which should be plotted
    nr_params, params_fval, theta_lb, theta_ub = get_data_to_plot(
        result=result, i_chain=i_chain, stepsize=stepsize)

    sns.set(style="ticks")

    ax = sns.pairplot(
        params_fval.drop(['logPosterior', 'iteration'], axis=1))

    if size is not None:
        ax.fig.set_size_inches(size)

    if suptitle:
        ax.fig.suptitle(suptitle)

    return ax 
Example #20
Source File: action.py    From insightconnect-plugins with MIT License 4 votes vote down vote up
def run(self, params={}):
        # Set styles
        sns.set_palette(params.get('color_palette'))
        sns.set(style=params.get('margin_style'))

        # Process the data and create the plot
        try:
            decoded_data = base64.b64decode(params.get('csv_data'))
        except Exception as e:
            error = f"Failed to decode base64 encoded CSV data with error: {e}"
            self.logger.error(error)
            raise e

        df = pd.read_csv(BytesIO(decoded_data))
        kind = params.get('kind')
        hue = params.get('hue')

        args = {
            "kind": kind
        }

        if hue and (len(hue) > 0):
            args['hue'] = hue

            if hue not in df:
                error = f"Column for hue ({hue}) not in data set, cannot create plot..."
                self.logger.error(error)
                return Exception(error)

        # Pairgrids have the savefig method, call it directly
        self.logger.info("Creating plot...")
        plot = sns.pairplot(df, **args)

        # bbox_inches is required to ensure that labels are cut off
        plot.savefig('plot.png', bbox_inches="tight")
        with open('plot.png', 'rb') as f:
            plot = base64.b64encode(f.read())

        return {
            "csv": params.get('csv_data'),
            "plot": plot.decode('utf-8')
        }