Python matplotlib.pyplot.boxplot() Examples

The following are 30 code examples of matplotlib.pyplot.boxplot(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module matplotlib.pyplot , or try the search function

Example #1

Source File: Flight Analysis.py From Cheapest-Flights-bot with MIT License

6 votes

def task_3_IQR(flight_data):
    plot=plt.boxplot(flight_data['Price'],patch_artist=True)
    for median in plot['medians']:
        median.set(color='#fc0004', linewidth=2)
    for flier in plot['fliers']:
        flier.set(marker='+', color='#e7298a')
    for whisker in plot['whiskers']:
        whisker.set(color='#7570b3', linewidth=2)
    for cap in plot['caps']:
        cap.set(color='#7570b3', linewidth=2)
    for box in plot['boxes']:
        box.set(color='#7570b3', linewidth=2)
        box.set(facecolor='#1b9e77')
    plt.matplotlib.pyplot.savefig('task_3_iqr.png')
    clean_data=[]
    for index,row in flight_data.loc[flight_data['Price'].isin(plot['fliers'][0].get_ydata())].iterrows():
        clean_data.append([row['Price'],row['Date_of_Flight']])
    return pd.DataFrame(clean_data, columns=['Price', 'Date_of_Flight'])

Example #2

Source File: vis.py From IRCLogParser with GNU General Public License v3.0

6 votes

def box_plot(data, output_directory, output_file_name):
    """
        Plots Box Plots

    Args:
        data (list):  data
        output_drectory(str): location to save graph
        output_file_name(str): name of the image file to be saved

    Returns:
        null
    """
    plt.figure()
    plt.boxplot(data)
    
    plt.legend()
    saver.check_if_dir_exists(output_directory)
    plt.savefig(output_directory + "/" + output_file_name + ".png")
    plt.close()

Example #3

Source File: run_analysis.py From automl_benchmark with MIT License

6 votes

def boxplot_viz(clean_df, target):
    clean_df = clean_df[target]
    models = pd.unique(clean_df.index.values)
    data_arr = np.array([clean_df[m].values for m in models]).T
    base_colors = [hsl2hex(c) for c in color_scale((0., 0.8, 0.6), (0.8, 0.8, 0.6), len(models))]
    plt.figure(figsize=(7, 3.5))
    title_str = "Raw Per Model {} Comparison ({})".format('Classification' if target=='F1_SCORE' else 'Regression', target)
    plt.title(title_str, size=12)
    bplot = plt.boxplot(data_arr, vert=False, patch_artist=True, notch=True, labels="    ", positions=list(reversed(range(1, len(models)+1))))

    for p, c in zip(bplot['boxes'], base_colors):
        p.set_facecolor(c)

    plt.legend(bplot['boxes'], models, loc='lower left', prop={'size': 8}, fancybox=True, framealpha=0.6)
    plt.setp(bplot['fliers'], markeredgecolor='grey')
    plt.setp(bplot['medians'], color='black')

    # plt.show()
    plt.savefig('figures/RawDataBoxPlot{}.pdf'.format(target), dpi=plt.gcf().dpi, transparent=True)

Example #4

Source File: report.py From wub with Mozilla Public License 2.0

6 votes

def plot_boxplots(self, data_map, title="", xlab="", ylab="", xticks_rotation=0, xticks_fontsize=5):
        """Plot multiple pairs of data arrays.

        :param self: object.
        :param data_map: A dictionary with labels as keys and lists as data values.
        :param title: Figure title.
        :param xlab: X axis label.
        :param ylab: Y axis label.
        :param xticks_rotation: Rotation value for x tick labels.
        :param xticks_fontsize: Fontsize for x tick labels.
        :returns: None
        :rtype: object
        """
        fig = plt.figure()
        plt.boxplot(list(data_map.values()))
        plt.xticks(np.arange(len(data_map)) + 1, data_map.keys(), rotation=xticks_rotation, fontsize=xticks_fontsize)
        self._set_properties_and_close(fig, title, xlab, ylab)

Example #5

Source File: analyse_results2.py From YAFS with MIT License

6 votes

def drawBoxPlot_Both_USER(app,dr,drILP):
    fig, ax = plt.subplots()
    data_a=dr[dr.app==app].r.values
    data_b=drILP[drILP.app==app].r.values
    ticks = list(np.sort(dr[dr.app==app].user.unique()))
    bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
    bpI = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
    set_box_color(bpl, '#5ab4ac') # colors are from http://colorbrewer2.org/
    set_box_color(bpI, '#d8b365')
    # draw temporary red and blue lines and use them to create a legend
    plt.plot([], c='#5ab4ac', label='Partition')
    plt.plot([], c='#d8b365', label='ILP') 
    plt.legend()
    
    plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
    plt.xlim(-2, len(ticks)*2)
    #plt.ylim(0, 10000)
#    plt.ylim(00, 1000)
    ax.set_title('App: %i'%app)
    ax.set_ylabel('Time Response')
    ax.set_xlabel('User')
    plt.tight_layout()
    plt.savefig(pathSimple+"app%i.png"%app)

Example #6

Source File: analyse_results2.py From YAFS with MIT License

6 votes

def drawBoxPlot_App(dar,darILP,labeldar="Partition",labelILP="ILP"):
    fig, ax = plt.subplots()
    #This is not work :/
    #data_a = dr.groupby(["app"]).agg({"values": lambda x: list(x.sum())})
    data_a=dar.r.values
    data_b=darILP.r.values
    ticks = list(np.sort(dar.app.unique()))
      
    bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
    bpI = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
    set_box_color(bpl, '#5ab4ac') # colors are from http://colorbrewer2.org/
    set_box_color(bpI, '#d8b365')
    # draw temporary red and blue lines and use them to create a legend
    plt.plot([], c='#5ab4ac', label=labeldar)
    plt.plot([], c='#d8b365', label=labelILP) 
    plt.legend()
    
    plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
    plt.xlim(-2, len(ticks)*2)
    #plt.ylim(50, 400)
    #plt.ylim(0, 10000)
    ax.set_title('All Apps')
    ax.set_ylabel('Time Response')
    ax.set_xlabel('App')
    plt.tight_layout()

Example #7

Source File: base_backend.py From delira with GNU Affero General Public License v3.0

6 votes

def _boxplot(self, plot_kwargs=None, figure_kwargs=None, **kwargs):
        """
        Function to create a boxplot and push it

        Parameters
        ----------
        plot_kwargs : dict
            the arguments for plotting
        figure_kwargs : dict
            the arguments to actually create the figure
        **kwargs :
            additional keyword arguments for pushing the created figure to the
            logging writer

        """
        if plot_kwargs is None:
            plot_kwargs = {}
        if figure_kwargs is None:
            figure_kwargs = {}
        with self.FigureManager(self._figure, figure_kwargs, kwargs):
            from matplotlib.pyplot import boxplot
            boxplot(**plot_kwargs)

Example #8

Source File: plotting.py From snn_toolbox with MIT License

6 votes

def plot_weight_distribution(path, model):
    parameters = model.get_weights()
    weights = parameters[0::2]
    biases = parameters[1::2]

    plt.figure(figsize=(15, 10))
    plt.boxplot([np.ravel(w) for w in weights], whis=15)
    plt.xlabel("Layer index")
    plt.ylabel("Weight value")
    plt.savefig(os.path.join(path, 'weight_distribution'))

    plt.figure(figsize=(15, 10))
    plt.boxplot([np.ravel(b) for b in biases])
    plt.xlabel("Layer index")
    plt.ylabel("Bias value")
    plt.savefig(os.path.join(path, 'bias_distribution'))

Example #9

Source File: analyse_results_debug.py From YAFS with MIT License

6 votes

def drawBoxPlot_App(dar,darILP,labeldar="Partition",labelILP="ILP"):
    fig, ax = plt.subplots()
    #This is not work :/
    #data_a = dr.groupby(["app"]).agg({"values": lambda x: list(x.sum())})
    data_a=dar.r.values
    data_b=darILP.r.values
    ticks = list(np.sort(dar.app.unique()))
      
    bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
    bpI = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
    set_box_color(bpl, '#5ab4ac') # colors are from http://colorbrewer2.org/
    set_box_color(bpI, '#d8b365')
    # draw temporary red and blue lines and use them to create a legend
    plt.plot([], c='#5ab4ac', label=labeldar)
    plt.plot([], c='#d8b365', label=labelILP) 
    plt.legend()
    
    plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
    plt.xlim(-2, len(ticks)*2)
    #plt.ylim(50, 400)
    #plt.ylim(0, 10000)
    ax.set_title('All Apps')
    ax.set_ylabel('Time Response')
    ax.set_xlabel('App')
    plt.tight_layout()

Example #10

Source File: analyse_results_debug.py From YAFS with MIT License

6 votes

def drawBoxPlot_Both_USER_ax(app,dr,drILP,ax):
    data_a=dr[dr.app==app].r.values
    data_b=drILP[drILP.app==app].r.values
    ticks = list(np.sort(dr[dr.app==app].user.unique()))
    bpl = ax.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.55,
                     whiskerprops = dict(linewidth=2),
                    boxprops = dict(linewidth=2),
                     capprops = dict(linewidth=2),
                    medianprops = dict(linewidth=2))
    bpI = ax.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.55,
                        whiskerprops = dict(linewidth=2),
                    boxprops = dict(linewidth=2),
                     capprops = dict(linewidth=2),
                    medianprops = dict(linewidth=2))
    set_box_color(bpl, '#a6bddb')
    set_box_color(bpI, '#e34a33')
    ax.get_xaxis().set_ticks(xrange(0, len(ticks) * 2, 2))
    ax.set_xticklabels(ticks)
    ax.set_xlim(-2, len(ticks)*2)
    ax.plot([], c='#a6bddb', label="Partition",linewidth=3)
    ax.plot([], c='#e34a33', label="ILP",linewidth=3)

Example #11

Source File: analyse_results2.py From YAFS with MIT License

6 votes

def drawBoxPlot_App(dar,darILP,labeldar="Partition",labelILP="ILP"):
    fig, ax = plt.subplots()
    #This is not work :/
    #data_a = dr.groupby(["app"]).agg({"values": lambda x: list(x.sum())})
    data_a=dar.r.values
    data_b=darILP.r.values
    ticks = list(np.sort(dar.app.unique()))
      
    bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
    bpI = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
    set_box_color(bpl, '#5ab4ac') # colors are from http://colorbrewer2.org/
    set_box_color(bpI, '#d8b365')
    # draw temporary red and blue lines and use them to create a legend
    plt.plot([], c='#5ab4ac', label=labeldar)
    plt.plot([], c='#d8b365', label=labelILP) 
    plt.legend()
    
    plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
    plt.xlim(-2, len(ticks)*2)
    #plt.ylim(50, 400)
    #plt.ylim(0, 10000)
    ax.set_title('All Apps')
    ax.set_ylabel('Time Response')
    ax.set_xlabel('App')
    plt.tight_layout()

Example #12

Source File: analyse_results2.py From YAFS with MIT License

6 votes

def drawBoxPlot_Both_USER(app,dr,drILP):
    fig, ax = plt.subplots()
    data_a=dr[dr.app==app].r.values
    data_b=drILP[drILP.app==app].r.values
    ticks = list(np.sort(dr[dr.app==app].user.unique()))
    bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
    bpI = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
    set_box_color(bpl, '#5ab4ac') # colors are from http://colorbrewer2.org/
    set_box_color(bpI, '#d8b365')
    # draw temporary red and blue lines and use them to create a legend
    plt.plot([], c='#5ab4ac', label='Partition')
    plt.plot([], c='#d8b365', label='ILP') 
    plt.legend()
    
    plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
    plt.xlim(-2, len(ticks)*2)
    #plt.ylim(0, 10000)
#    plt.ylim(00, 1000)
    ax.set_title('App: %i'%app)
    ax.set_ylabel('Time Response')
    ax.set_xlabel('User')
    plt.tight_layout()
    plt.savefig(pathSimple+"app%i.png"%app)

Example #13

Source File: utilities.py From EvaluatingDPML with MIT License

5 votes

def make_membership_box_plot(vector):
    plt.boxplot([vector[:10000], vector[10000:]], labels=['members', 'non-members'], whis='range')
    plt.yscale('log')
    plt.ylabel('Per-Instance Loss')
    plt.show()

Example #14

Source File: relation.py From visualize_ML with MIT License

5 votes

def bivariate_analysis_catg_cont(catg_cont_list,df,target_name,sub_len,COUNTER,PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE):

    # No need to remove string varible as they are handled by chi2 function of sklearn.
    # clean_catg_cont_list = clean_str_list(df,catg_cont_list)
    clean_catg_cont_list = catg_cont_list
    clean_df = df.dropna()

    for col in clean_catg_cont_list:

        col_classes =df[target_name].unique()

        summary = clean_df[col].describe()
        count = summary[0]
        mean = summary[1]
        std = summary[2]

        plt.subplot(PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE,COUNTER)
        plt.title("mean "+str(np.float32(mean))+" std "+str(np.float32(std)),fontsize=10)

        x = [np.array(clean_df[clean_df[target_name]==i][col]) for i in col_classes]
        y = clean_df[target_name]

        f_value,p_val = evaluate_anova(np.array(clean_df[col]).reshape(-1,1),y)

        plt.xlabel(target_name+"\n f_value: "+str(np.float32(f_value[0]))+" / p_val: "+str(p_val[0]), fontsize=10)
        plt.ylabel(col, fontsize=10)
        plt.boxplot(x)

        print (col+" vs "+target_name+" plotted....")

        COUNTER +=1

    return plt,COUNTER

#returns the total number of subplots to be made.

Example #15

Source File: utilities.py From EvaluatingDPML with MIT License

5 votes

def make_predictions_box_plot(vector, mem, pred_mem):
    tp_vec = [vector[i] for i in range(len(vector)) if mem[i] == 1 and pred_mem[i] == 1]
    fn_vec = [vector[i] for i in range(len(vector)) if mem[i] == 1 and pred_mem[i] == 0]
    fp_vec = [vector[i] for i in range(len(vector)) if mem[i] == 0 and pred_mem[i] == 1]
    tn_vec = [vector[i] for i in range(len(vector)) if mem[i] == 0 and pred_mem[i] == 0]
    plt.boxplot([tp_vec, fn_vec, fp_vec, tn_vec], labels=['TP', 'FN', 'FP', 'TN'], whis='range')
    plt.yscale('log')
    plt.ylabel('Per-Instance Loss')
    plt.show()

Example #16

Source File: evaluate.py From 3DUnetCNN with MIT License

5 votes

def main():
    header = ("WholeTumor", "TumorCore", "EnhancingTumor")
    masking_functions = (get_whole_tumor_mask, get_tumor_core_mask, get_enhancing_tumor_mask)
    rows = list()
    subject_ids = list()
    for case_folder in glob.glob("prediction/*"):
        if not os.path.isdir(case_folder):
            continue
        subject_ids.append(os.path.basename(case_folder))
        truth_file = os.path.join(case_folder, "truth.nii.gz")
        truth_image = nib.load(truth_file)
        truth = truth_image.get_data()
        prediction_file = os.path.join(case_folder, "prediction.nii.gz")
        prediction_image = nib.load(prediction_file)
        prediction = prediction_image.get_data()
        rows.append([dice_coefficient(func(truth), func(prediction))for func in masking_functions])

    df = pd.DataFrame.from_records(rows, columns=header, index=subject_ids)
    df.to_csv("./prediction/brats_scores.csv")

    scores = dict()
    for index, score in enumerate(df.columns):
        values = df.values.T[index]
        scores[score] = values[np.isnan(values) == False]

    plt.boxplot(list(scores.values()), labels=list(scores.keys()))
    plt.ylabel("Dice Coefficient")
    plt.savefig("validation_scores_boxplot.png")
    plt.close()

    if os.path.exists("./training.log"):
        training_df = pd.read_csv("./training.log").set_index('epoch')

        plt.plot(training_df['loss'].values, label='training loss')
        plt.plot(training_df['val_loss'].values, label='validation loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.xlim((0, len(training_df.index)))
        plt.legend(loc='upper right')
        plt.savefig('loss_graph.png')

Example #17

Source File: plots_utils.py From treeomics with GNU General Public License v3.0

5 votes

def boxplot(filename, patient):
    """
    Create box plot of the mutant allele frequencies in each sample
    :param filename: name of the output file
    :param patient: instance of the class patient
    """

    bp_fig, bp_ax = plt.subplots(figsize=(len(patient.sample_mafs)*0.56, 4))
    meanpointprops = dict(marker='o', markeredgecolor='black', markersize=4, markerfacecolor='none')

    data = []
    upper_labels = []    # number of mutations
    for sample_name in patient.sample_names:
        data.append(patient.sample_mafs[sample_name])
        upper_labels.append(len(patient.sample_mafs[sample_name]))

    plt.boxplot(data, notch=0, showfliers=False, sym='+', vert=1, whis=1.5, meanprops=meanpointprops,
                meanline=False, showmeans=True)

    bp_ax.set_ylim([0, 1.0])
    # bp_ax.set_title(patient.name)
    bp_ax.set_xlabel('Samples')
    bp_ax.set_ylabel('Variant allele frequency')
    bp_ax.set_xticklabels([sa_n.replace('_', ' ') for sa_n in patient.sample_names], rotation=45)
    # caption = 'Mutant allele frequency (MAF) distribution in the DNA samples of {}. '.format(patient.name)
    # bp_fig.text(0, -0.2, caption,
    #                horizontalalignment='left', color='black', fontsize=10)
    for sa_idx, sample_name in enumerate(patient.sample_names):
        # bp_ax.text(sa_idx+1, 0.93, len(patient.sample_mafs[sample_name]),
        #            horizontalalignment='center', color='#707070', fontsize=9)
        bp_ax.text(sa_idx+1, 0.93, '{}x'.format(np.median(patient.sample_phred_coverages[sample_name])),
                   horizontalalignment='center', fontsize=9,
                   color=('black' if np.median(patient.sample_phred_coverages[sample_name]) >= 100 else 'red'))

    plt.savefig(filename, dpi=150, bbox_inches='tight', transparent=True)
    plt.close()
    logger.info('Generated boxplot for mutant allele frequencies {}'.format(filename))

Example #18

Source File: plot.py From VerticaPy with Apache License 2.0

5 votes

def boxplot2D(vdf, 
			  columns: list = []):
	if not(columns):
		columns = vdf.numcol()
	for column in columns:
		if (column not in vdf.numcol()):
			print("\u26A0 Warning: The Virtual Column {} is not numerical.\nIt will be ignored.".format(column))
			columns.remove(column)
	if not(columns):
		print("\u26A0 Warning: No numerical columns found to draw the multi boxplot")
		raise
	# SINGLE BOXPLOT	
	if (len(columns) == 1):
		vdf[columns[0]].boxplot()
	# MULTI BOXPLOT
	else:
		try:
			summarize = vdf.describe(columns = columns).transpose()
			result = [summarize.values[column][3:8] for column in summarize.values]
			columns = [column for column in summarize.values]
			del columns[0]
			del result[0]
			plt.figure(figsize = (14, 8)) if isnotebook() else plt.figure(figsize = (10, 6))
			plt.rcParams['axes.facecolor'] = '#F8F8F8'
			plt.xticks(rotation = 90)
			box = plt.boxplot(result, notch = False, sym = '', whis = float('Inf'), widths = 0.5, labels = columns, patch_artist = True)
			plt.title('Multi BoxPlot of the vDataFrame')
			plt.subplots_adjust(bottom = max(0.3, len(max([str(item) for item in columns], key = len)) / 90.0))
			colors = gen_colors()
			for median in box['medians']:
				median.set(color = 'black', linewidth = 1,)
			for patch,color in zip(box['boxes'], colors):
				patch.set_facecolor(color)
			plt.show()
		except Exception as e:
			raise Exception("{}\nAn error occured during the BoxPlot creation.".format(e))
#---#

Example #19

Source File: evaluate.py From Keras-Brats-Improved-Unet3d with MIT License

5 votes

def main():
    header = ("WholeTumor", "TumorCore", "EnhancingTumor")
    masking_functions = (get_whole_tumor_mask, get_tumor_core_mask, get_enhancing_tumor_mask)
    rows = list()
    subject_ids = list()
    for case_folder in glob.glob("prediction/*"):
        if not os.path.isdir(case_folder):
            continue
        subject_ids.append(os.path.basename(case_folder))
        truth_file = os.path.join(case_folder, "truth.nii.gz")
        truth_image = nib.load(truth_file)
        truth = truth_image.get_data()
        prediction_file = os.path.join(case_folder, "prediction.nii.gz")
        prediction_image = nib.load(prediction_file)
        prediction = prediction_image.get_data()
        rows.append([dice_coefficient(func(truth), func(prediction))for func in masking_functions])

    df = pd.DataFrame.from_records(rows, columns=header, index=subject_ids)
    df.to_csv("./prediction/brats_scores.csv")

    scores = dict()
    for index, score in enumerate(df.columns):
        values = df.values.T[index]
        scores[score] = values[np.isnan(values) == False]

    plt.boxplot(list(scores.values()), labels=list(scores.keys()))
    plt.ylabel("Dice Coefficient")
    plt.savefig("validation_scores_boxplot.png")
    plt.close()

    if os.path.exists("./training.log"):
        training_df = pd.read_csv("./training.log").set_index('epoch')

        plt.plot(training_df['loss'].values, label='training loss')
        plt.plot(training_df['val_loss'].values, label='validation loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.xlim((0, len(training_df.index)))
        plt.legend(loc='upper right')
        plt.savefig('loss_graph.png')

Example #20

Source File: plot.py From enlopy with BSD 3-Clause "New" or "Revised" License

5 votes

def plot_boxplot(Load, by='day', **pltargs):
    """Return boxplot plot for each day of the week

    Arguments:
        Load (pd.Series): 1D pandas Series with timed index
        by (str): group results by 'day' or 'hour'
        **pltargs (dict): Exposes :meth:`matplotlib.pyplot.plot` arguments
    Returns:
        plot
    """
    Load = clean_convert(Load,force_timed_index=True)

    if by == 'day':
        grp = Load.groupby(Load.index.weekday)
        labels = "Mon Tue Wed Thu Fri Sat Sun".split()
    elif by == 'hour':
        grp = Load.groupby(Load.index.hour)
        labels = np.arange(0, 24)
    else:
        raise NotImplementedError('Only "day" and "hour" are implemented')
    a = []
    for __, value in grp:
        a.append(value)
    plt.boxplot(a, labels=labels, **pltargs)
    # TODO : Generalize to return monthly, hourly etc.
    # TODO Is it really needed? pd.boxplot()

Example #21

Source File: relation.py From visualize_ML with MIT License

5 votes

def evaluate_anova(x,y):
    F_value,pvalue = f_classif(x,y)
    return F_value,pvalue

# In descriptive statistics, a box plot or boxplot is a convenient way of graphically depicting groups of numerical data through their quartiles. Box plots may also have lines extending vertically from the boxes (whiskers) indicating variability outside the upper and lower quartiles, hence the terms box-and-whisker plot and box-and-whisker diagram.
# Quartile: In descriptive statistics, the quartiles of a ranked set of data values are the three points that divide the data set into four equal groups, each group comprising a quarter of the data

Example #22

Source File: relation.py From visualize_ML with MIT License

5 votes

def bivariate_analysis_cont_catg(cont_catg_list,df,target_name,sub_len,COUNTER,PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE):

    clean_cont_catg_list = clean_str_list(df,cont_catg_list)

    if len(clean_str_list(df,[target_name])) == 0 and len(cont_catg_list)>0:
        raise ValueError("You seem to have a target variable with string values.")
    clean_df = df.dropna()

    for col in clean_cont_catg_list:

        col_classes =clean_df[col].unique()

        summary = clean_df[col].describe()
        count = summary[0]
        mean = summary[1]
        std = summary[2]

        plt.subplot(PLOT_ROW_SIZE,PLOT_COLUMNS_SIZE,COUNTER)
        plt.title("mean "+str(np.float32(mean))+" std "+str(np.float32(std)),fontsize=10)

        x = [np.array(clean_df[clean_df[col]==i][target_name]) for i in col_classes]
        y = np.float32(clean_df[target_name])

        f_value,p_val = evaluate_anova(np.array(clean_df[col]).reshape(-1,1),y)

        plt.xlabel(col+"\n f_value: "+str(np.float32(f_value[0]))+" / p_val: "+str(p_val[0]), fontsize=10)
        plt.ylabel(target_name, fontsize=10)
        plt.boxplot(x)

        print (col+" vs "+target_name+" plotted....")

        COUNTER +=1

    return plt,COUNTER


# This function is for the bivariate analysis between categorical vs continuous varibale.Plots box plots.

Example #23

Source File: test_psi_pi.py From pgmult with MIT License

5 votes

def test_pgm_rvs():
    K = 10
    mu, sig = compute_uniform_mean_psi(K, sigma=2)
    # mu = np.zeros(K-1)
    # sig = np.ones(K-1)
    print("mu:  ", mu)
    print("sig: ", sig)

    Sigma = np.diag(sig)

    # Add some covariance
    # Sigma[:5,:5] = 1.0 + 1e-3*np.random.randn(5,5)

    # Sample a bunch of pis and look at the marginals
    pgm = PGMultinomial(K, mu=mu, Sigma=Sigma)
    samples = 10000
    pis = []
    for smpl in range(samples):
        pgm.resample()
        pis.append(pgm.pi)
    pis = np.array(pis)

    print("E[pi]:   ", pis.mean(axis=0))
    print("var[pi]: ", pis.var(axis=0))

    plt.figure()
    plt.subplot(121)
    plt.boxplot(pis)
    plt.xlabel("k")
    plt.ylabel("$p(\pi_k)$")

    # Plot the covariance
    cov = np.cov(pis.T)
    plt.subplot(122)
    plt.imshow(cov, interpolation="None", cmap="cool")
    plt.colorbar()
    plt.title("Cov($\pi$)")
    plt.show()

Example #24

Source File: summary_boxplots.py From deep500 with BSD 3-Clause "New" or "Revised" License

5 votes

def after_training(self, runner, training_stats: TrainingStatistics):
        import matplotlib.pyplot as plt

        if len(training_stats.train_summaries) > 0 and (len(training_stats.train_summaries[0].time_used_inference) == 0
                and len(training_stats.train_summaries[0].time_used_optimizing) == 0):
            raise ValueError('To generate box-plots, please train with the '
                             'Trainer object with collect_all_times=True')
        
        inference_test_data = [s.time_used_inference for s in training_stats.test_summaries]
        optimizing_time_train = [s.time_used_optimizing for s in training_stats.train_summaries]

        plt.figure()
        plt.title('Time used for inference')
        plt.xlabel('Epoch')
        plt.ylabel('time used')
        plt.boxplot(inference_test_data, 1, '')
        plt.savefig(self.path + "_inference_test")
        print('Box plot written to: {}.png'.format(self.path + "_inference_test"))
        plt.close()

        plt.figure()
        plt.title('Time used for optimization (inference + gradient update)')
        plt.xlabel('Epoch')
        plt.ylabel('time used')
        plt.boxplot(optimizing_time_train, 1, '')
        plt.savefig(self.path + "_optimizing_train")
        print('Box plot written to: {}.png'.format(self.path + "_optimizing_train"))
        plt.close()

Example #25

Source File: analyse_results2.py From YAFS with MIT License

5 votes

def drawBoxPlot_User_App(dr,app):
    fig, ax = plt.subplots()
    ax.boxplot(dr[dr.app==app]["r"].values)
    #TODO ILP CHANGE POSITION 
    ax.set_xticklabels(dr[dr.app==app]["user"].values)
    ax.set_title('App: %i'%app)
    ax.set_ylabel('Time Response')
    ax.set_xlabel('User')
    plt.show()

Example #26

Source File: analyse_results_debug.py From YAFS with MIT License

5 votes

def drawBoxPlot_User_App(dr,app):
    fig, ax = plt.subplots()
    ax.boxplot(dr[dr.app==app]["r"].values)
    #TODO ILP CHANGE POSITION 
    ax.set_xticklabels(dr[dr.app==app]["user"].values)
    ax.set_title('App: %i'%app)
    ax.set_ylabel('Time Response')
    ax.set_xlabel('User')
    plt.show()

Example #27

Source File: analyse_results2.py From YAFS with MIT License

5 votes

def drawBoxPlot_Both_USER_ax(app,dr,drILP,ax):
    data_a=dr[dr.app==app].r.values
    data_b=drILP[drILP.app==app].r.values
    ticks = list(np.sort(dr[dr.app==app].user.unique()))
    bpl = ax.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
    bpI = ax.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
    set_box_color(bpl, '#5ab4ac')
    set_box_color(bpI, '#d8b365')
    ax.get_xaxis().set_ticks(xrange(0, len(ticks) * 2, 2))
    ax.set_xticklabels(ticks)
    ax.set_xlim(-2, len(ticks)*2)
    ax.plot([], c='#5ab4ac', label="Partition")
    ax.plot([], c='#d8b365', label="ILP")

Example #28

Source File: analyse_results2.py From YAFS with MIT License

5 votes

def drawBoxPlot_User_App(dr,app):
    fig, ax = plt.subplots()
    ax.boxplot(dr[dr.app==app]["r"].values)
    #TODO ILP CHANGE POSITION 
    ax.set_xticklabels(dr[dr.app==app]["user"].values)
    ax.set_title('App: %i'%app)
    ax.set_ylabel('Time Response')
    ax.set_xlabel('User')
    plt.show()

Example #29

Source File: prob_vs_cl.py From rgz_rcnn with MIT License

5 votes

def plot_prob_cl_box(prob_cl_mapping_list, plot_outliers=False):
    ks = prob_cl_mapping_list.keys()
    ks.sort()
    for i, classname in enumerate(ks):
        v = prob_cl_mapping_list[classname]
        data = [[], [], [], []]
        labels = ['0.6~0.7', '0.7~0.8', '0.8~0.9', '0.9~1.0']
        prob_list = v[0]
        cl_list = v[1]
        for cl, prob in zip(cl_list, prob_list):
            if 0.6 <= cl < 0.7:
                ind = 0
            elif 0.7 <= cl < 0.8:
                ind = 1
            elif 0.8 <= cl < 0.9:
                ind = 2
            elif 0.9 <= cl <= 1.0:
                ind = 3
            else:
                raise Exception("invalid CL: %.3f" % cl)
            data[ind].append(prob)
        ax = plt.subplot(3, 2, i + 1)
        if (plot_outliers):
            symb = '+'
        else:
            symb = ''
        plt.boxplot(data, labels=labels, sym=symb)
        plt.xlabel('Consensus level')
        plt.grid(True, linestyle='-', which='major', color='lightgrey',
               alpha=0.5, axis='y')
        if (i % 2 == 0):
            #plt.ylabel('Classification probability')
            plt.ylabel('Probability')
        # if (not plot_outliers):
        #     plt.ylim([0.6, 1.0])
        ax.set_title('%s' % classname.replace('_', 'C_') + 'P')
    #plt.suptitle('Probability vs. Consensus level')
    plt.tight_layout(h_pad=0.0)
    plt.show()

Example #30

Source File: AutoPlot.py From amplicon_sequencing_pipeline with MIT License

5 votes

def plotAlphaDiversities(self, alphaDiversityFile, figure_filename):
        # Take an alpha diversity file and create a box plot
        with open(alphaDiversityFile,'r') as fid:
            all_lines = fid.readlines()
            alpha_diversities = [float(line.split()[1]) for line in all_lines[1:]]
            sampleIDs = [line.split()[0] for line in all_lines[1:]]
            figure()
            plt.boxplot(alpha_diversities)
            plt.xlabel('Sample category')
            plt.ylabel('Alpha diversity')
            plt.savefig(figure_filename)