python source code of LSDMap

#=============================================================================
# These functions create figures for visualising the m/n selection using the chi
# mapping tool.
#
# It creates separate plots for each basin in the DEM.
#
# Authors:
#     Simon M. Mudd
#     Fiona J. Clubb
#=============================================================================
#=============================================================================
# IMPORT MODULES
#=============================================================================
# set backend to run on server
import matplotlib
matplotlib.use('Agg')

import numpy as np
import LSDPlottingTools as LSDP
import matplotlib.pyplot as plt
from matplotlib import rcParams
import matplotlib.ticker as ticker
import pandas as pd
from matplotlib import colors
import math
import os
import subprocess
from matplotlib import cm
#from shapely.geometry import Polygon
from LSDMapFigure import PlottingHelpers as Helper
from LSDMapFigure.PlottingRaster import MapFigure
from LSDMapFigure.PlottingRaster import BaseRaster
from LSDPlottingTools import LSDMap_SAPlotting as SA
from LSDPlottingTools import joyplot

#===========================================
# Function to make a figure object
#===========================================
def makefigure(size_format = "EPSL", aspect_ratio=16./9.):
    """
    This function makes a figure object based on the specified size_format
    of specific journals. This could be implemented at a global level.
    Sets RC params for figures

    MDH

    """

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size
    rcParams['text.usetex'] = False

    # make the figure
    if type(size_format) is float:
        fig = plt.figure(1, facecolor='white',figsize=(size_format,size_format/aspect_ratio))
    elif size_format == "geomorphology":
        fig = plt.figure(1, facecolor='white',figsize=(6.25,6.25/aspect_ratio))
    elif size_format == "big":
        fig = plt.figure(1, facecolor='white',figsize=(16,16./aspect_ratio))
    elif size_format == "EPSL":
        fig = plt.figure(1, facecolor='white',figsize=(7.48,7.48/aspect_ratio))
    else:
        fig = plt.figure(1, facecolor='white',figsize=(4.92126,4.92126/aspect_ratio))

    return fig

#=============================================================================
#=============================================================================
# ANALYSIS FUNCTIONS
# These functions analyse the data (for example, doing the outlier checking)
#=============================================================================
def SimpleMaxMLECheck(BasinDF):
    """
    This function checks through the basin dataframe and returns a dict with the basin key and the best fit
    m/n

    Args:
        BasinDF: pandas dataframe from the basin csv file.

    Returns:
        m_over_n_dict: dictionary with the best fit m/n for each basin, the key is the basin key
        and the value is the best fit m/n

    Author: FJC
    """
    # read in the basin csv
    basin_keys = list(BasinDF['basin_key'])
    #print BasinDF

    # remove the first 2 columns from DF
    del BasinDF['basin_key']
    del BasinDF['outlet_jn']

    # now find the index and value of the max MLE in each row
    MOverNs = list(BasinDF.idxmax(axis=1))
    MOverNs = [float(x.split()[-1]) for x in MOverNs]
    print ("MAX MOVERNS")
    print (MOverNs)

    # zip into a dictionary
    MOverNDict = dict(zip(basin_keys, MOverNs))
    return MOverNDict

def GetMOverNRangeMCPoints(BasinDF, start_movern=0.2, d_movern=0.1, n_movern=7):
    """
    This function checks through the MC points basin dataframe and returns the best fit and
    range of m/n values. The range is given as a list in the column (all values of m/n where
    the 3rd quartile is above the first quartile value for the best fit m/n)

    Args:
        BasinDF: pandas dataframe from the basin MC points csv file.
        start_movern (float): starting m/n value
        d_movern (float): step in m/n value
        n_movern (float): number of m/n values

    Returns:
        dataframe with the basin key, best fit m/n, and range of m/n for each basin

    Author: FJC
    """
    from scipy import stats

    # get the medians from the dataframe
    MedianDF = BasinDF.filter(regex='median')

    # find the median with the highest MLE for each basin
    MaxMedians = list(MedianDF.idxmax(axis=1))
    MaxMedians = [str(x) for x in MaxMedians]
    Median_MOverNs = []
    for median in MaxMedians:
        if "=" in median:
            Median_MOverNs.append(float(median.split("=")[-1]))
        else:
            Median_MOverNs.append(np.nan)
    #Median_MOverNs = [float(x.split("=")[-1]) for x in MaxMedians]

    # now find the first quartile that corresponds to this median
    FirstQDF = BasinDF.filter(regex='FQ')
    FirstQF_MLEs = []
    for i, median in enumerate(Median_MOverNs):
        if not np.isnan(median):
            # find the right column
            this_col = "FQ_MLE_m_over_n="+str(median)
            FirstQDF_mask = FirstQDF[this_col]
            FirstQF_MLEs.append(float(FirstQDF_mask.iloc[i]))
        else:
            FirstQF_MLEs.append(np.nan)

    # now, for each basin, find the columns in the 3rd quartile which are higher than the first Q MLE
    ThirdQDF = BasinDF.filter(regex='TQ')

    # change the column names to just have the m/n values
    column_names = list(ThirdQDF)
    column_names = [x.split("=")[-1] for x in column_names]
    ThirdQDF.columns = column_names

    # add the threshold first Q MLEs to the dataframe
    ThirdQDF['threshold'] = pd.Series(FirstQF_MLEs, index=ThirdQDF.index)
    print (ThirdQDF)
    # change DF to a boolean where values are greater than the threshold
    TempDF = ThirdQDF.drop('threshold', 1).gt(ThirdQDF['threshold'], 0)
    # get the column names where the values are greater than the threshold for each basin
    TempDF['Range_MOverNs'] = TempDF.apply(lambda x: ','.join(x.index[x]),axis=1)

    end_movern = float(start_movern)+float(d_movern)*(float(n_movern)-1)

    # get the m/n values greater than the threshold to a list, then get the min and max
    Min_MOverNs = []
    Max_MOverNs = []
    Range_MOverNs = list(TempDF['Range_MOverNs'])
    for i in range (len(Range_MOverNs)):
        movern_str = Range_MOverNs[i].split(",")
        if '' in movern_str:
            Min_MOverNs.append(np.nan)
            Max_MOverNs.append(np.nan)
        else:
            movern_floats = [float(x) for x in movern_str]
            # we need to linearly interpolate between the nearest m/ns for the min
            # and the max.
            Min_MOverN = min(movern_floats)
            Max_MOverN = max(movern_floats)
            
            

            # ok, for the minimum, get the previous m/n value
            if float(Min_MOverN) - float(d_movern) < float(start_movern):
                new_min_movern = Min_MOverN
            else:
                print("The minimum theta is: "+str(Min_MOverN))
                this_key = round((float(Min_MOverN)-float(d_movern)),4)
                print("The other theta is: "+str(this_key))
                movern_list = [float(Min_MOverN)-float(d_movern), Min_MOverN]
                mle_list = [ThirdQDF[str(this_key)][i],ThirdQDF[str(float(Min_MOverN))][i]]
                slope, intercept, r_value, p_value, std_err = stats.linregress(movern_list, mle_list)
                new_min_movern = (ThirdQDF['threshold'][i] - intercept)/slope

            # for the maximum get the next m/n value
            if float(Max_MOverN) + float(d_movern) > float(end_movern):
                new_max_movern = Max_MOverN
            else:
                movern_list = [Max_MOverN, float(Max_MOverN)+float(d_movern)]

                # Alright, In some apparently random cases depending on computer architecture, float to string conversion can be F***** up.
                # Here is a way to deal with it: it first try the regular code that works for many cases
                # And only correct the string if needed.
                # Slightly hacky, it might not work in the extremely rare cases where you try to constrain you concavity at 0.001 precision AND you have the wrong cpu or motherboard or whatever decides to convert 0.75 to 0.7500000000000004.
                try:
                    # Regular case
                    mle_list = [ThirdQDF[str(Max_MOverN)][i], ThirdQDF[(str(float(Max_MOverN)+float(d_movern)))][i]]
                except KeyError:
                    # DID NOT WORK, fixing
                    # DEaling with annoying string issue. No time to look for clean solution so here is a hacky way:
                    str_max_movern = "%0.2f" % Max_MOverN #  str(Max_MOverN)# converting
                    dtrdiff ="%0.2f" % (float(Max_MOverN)+float(d_movern)) # str(float(Max_MOverN)+float(d_movern))
                    # # if Conversion failed it screws the string into domething like 0.300000000000000004 instead of 0.3
                    # if(len(str_max_movern)>4):
                    #     #Fixing the extra 0
                    #     str_max_movern = str_max_movern[0:4]
                    # # Code won't work if 0.70000000000004 is converted to 0.70 and need in these case to be reconverted to 0.7 AAAAAAAAAAAAAAAAAAAAAAAAAAAH
                    if(str_max_movern[-1] == "0"):
                        str_max_movern = str_max_movern[:-1]

                    # # Same procedure for string 2, like EXACTLY the same
                    # dtrdiff = str(float(Max_MOverN)+float(d_movern))
                    # if(len(dtrdiff)>4):
                    #     #Fixing the extra 0
                    #     dtrdiff = dtrdiff[0:4]
                    if(dtrdiff[-1] == "0"):
                        dtrdiff = dtrdiff[:-1]

                    # # Done. Correcting with the correct concavity.
                    # try:
                    mle_list = [ThirdQDF[str_max_movern][i], ThirdQDF[(dtrdiff)][i]]
                    # except KeyError:

                # Out of the bug area
                slope, intercept, r_value, p_value, std_err = stats.linregress(movern_list, mle_list)
                new_max_movern = (ThirdQDF['threshold'][i] - intercept)/slope

            Min_MOverNs.append(new_min_movern)
            Max_MOverNs.append(new_max_movern)


    # write the output dataframe
    OutputDF = pd.DataFrame()
    OutputDF['basin_key'] = BasinDF['basin_key']
    OutputDF['Median_MOverNs'] = pd.Series(Median_MOverNs)
    OutputDF['FirstQ_threshold'] = pd.Series(FirstQF_MLEs)
    OutputDF['Min_MOverNs'] = pd.Series(Min_MOverNs)
    OutputDF['Max_MOverNs'] = pd.Series(Max_MOverNs)

    return OutputDF

def GetRangeMOverNChiResiduals(DataDirectory, fname_prefix, basin_list=[0], parallel=False):
    """
    This function reads in the CSV files with the chi residuals data and
    calculates the median best fit m/n along with the min and max from the
    first and third quartiles. These are returned as a pandas dataframe because
    I love pandas <3 <3
    SMM Note: this isn't really used since the residual analysis doesn't work. 

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.

    Returns:
        pandas dataframe with m/n data from the chi residuals analysis

    Author: FJC
    """
    # first let's read in the csv files with the residuals data
    if not parallel:
        dfs = Helper.ReadChiResidualsCSVs(DataDirectory,fname_prefix)
    else:
        dfs = Helper.AppendChiResidualsCSVs(DataDirectory,fname_prefix)

    # get the best fit m/n from the dataframes
    movern_data = []
    for i in range(len(dfs)):
        dfs[i] = dfs[i][dfs[i]['basin_key'].isin(basin_list)]
        ThisDF = dfs[i][dfs[i] < 0]

        indices = []
        for i, row in ThisDF.iterrows():
            # find the first index where it is not a nan
            index = row.first_valid_index()
            if index == None:
                index = row.index[-1]
            indices.append(float(index.split("=")[-1]))
        movern_data.append(indices)

    # the movern_data is a list of lists containing the information.
    # movern_data[0] = median
    # movern_data[1] = first quartile
    # movern_data[2] = third quartile
    # the second index is the basin key.
    # so to get the median of basin 3 you would use movern_data[0][3]

    # write the output dataframe
    OutputDF = pd.DataFrame()
    OutputDF['basin_key'] = dfs[0]['basin_key']
    OutputDF['Median_MOverNs'] = pd.Series(movern_data[0])
    OutputDF['FirstQ_MOverNs'] = pd.Series(movern_data[1])
    OutputDF['ThirdQ_MOverNs'] = pd.Series(movern_data[2])

    return OutputDF

def GetBestFitMOverNFromDisorder(BasinDF):
    """
    This function looks in the basin DF from the disorder CSV files
    and returns a dict with the basin ID and the best-fit m/n for each one.
    The best fit m/n is the one with the minimum disorder statistic.

    Args:
        BasinDF: basin disorder dataframe

    Returns: dict with the best fit m/n for each basin, key is the basin ID and value is the best fit m/n

    Author: FJC
    """
    # read in the basin csv
    basin_keys = list(BasinDF['basin_key'])
    #print BasinDF

    # remove the first 2 columns from DF
    del BasinDF['basin_key']
    del BasinDF['outlet_jn']

    # now find the index and value of the max MLE in each row
    MOverNs = list(BasinDF.idxmin(axis=1))
    MOverNs = [float(x.split()[-1]) for x in MOverNs]
    print ("Best-fit m/ns disorder")
    print (MOverNs)

    # zip into a dictionary
    MOverNDict = dict(zip(basin_keys, MOverNs))
    return MOverNDict


def CompareChiAndSAMOverN(DataDirectory, fname_prefix, basin_list=[0], start_movern=0.2, d_movern=0.1, n_movern=7):
    """
    This function compiles

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.

    Author: SMM
    """


    from LSDPlottingTools import LSDMap_PointTools as PointTools
    from LSDPlottingTools import LSDMap_SAPlotting as SAPlot

    # read in binned SA data
    binned_csv_fname = DataDirectory+fname_prefix+'_SAbinned.csv'
    print("I'm reading in the csv file "+binned_csv_fname)
    binnedPointData = PointTools.LSDMap_PointData(binned_csv_fname)

    # get the basin keys and check if the basins in the basin list exist
    basin = binnedPointData.QueryData('basin_key')
    basin = [int(x) for x in basin]
    Basin = np.asarray(basin)
    these_basin_keys = np.unique(Basin)

    print("The unique basin keys are: ")
    print(these_basin_keys)

    final_basin_keys = []
    # A bit of logic for checking keys
    if (len(basin_list) == 0):
        final_basin_keys = these_basin_keys
    else:
        for basin in basin_list:
            if basin not in these_basin_keys:
                print("You were looking for basin "+str(basin)+ " but it isn't in the basin keys.")
            else:
                final_basin_keys.append(basin)

    print("The final basin keys are:")
    print(final_basin_keys)

    # Now get all the m/n values from the basin list
    SA_movern_dict = SAPlot.BinnedRegressionDriver(DataDirectory, fname_prefix,
                                                   basin_keys = final_basin_keys)

    # Now do the same with the chi-derived m/n data
    (Outlier_counter, removed_sources_dict, best_fit_movern_dict, MLEs_dict) = CheckMLEOutliers(DataDirectory, fname_prefix, basin_list=final_basin_keys,
                                 start_movern=start_movern, d_movern=d_movern,
                                 n_movern=n_movern)

    # Now print to files
    print("The best fit m.n for chi analysis are: ")
    print(best_fit_movern_dict)

    # Now plot the figure
    PlotMOverNDicts(DataDirectory, fname_prefix, SA_movern_dict,best_fit_movern_dict, FigFormat = "png", size_format = "ESURF")

def CompareMOverNEstimatesAllMethods(DataDirectory, fname_prefix, basin_list=[0], start_movern=0.2, d_movern=0.1, n_movern=7, parallel=False, Chi_disorder=False):
    """
    This function reads in all the files with the data for the various methods of estimating
    the best fit m/n and produces a summary csv file which has the best fit m/n and uncertainty
    for each basin. This csv can then be used to plot the data.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.
        Chi_disorder (bool): If true, will include the chi disorder stats

    Returns:
        writes a csv with the best fit m/n info for each basin

    Author: FJC
    """
    # check if a directory exists for the summary plots. If not then make it.
    summary_directory = DataDirectory+'summary_plots/'
    if not os.path.isdir(summary_directory):
        os.makedirs(summary_directory)
        
    # First, we need to check if the full chi actually is there
    Fname_bootstrap_stats = DataDirectory+fname_prefix+'_movernstats_basinstats.csv'
    from pathlib import Path

    bootstrap_exists = False
    my_file = Path(Fname_bootstrap_stats)
    if my_file.is_file():
        bootstrap_exists = True
        print("The bootstrap data exists. I hope it was worth the wait. ")
    else:
        print("I did not find bootstrap data, so I am assuming only S-A data and disorder data.")
        
    

    # read in the full chi dataframe
    if not parallel:
        FullChiBasinDF = Helper.ReadBasinStatsCSV(DataDirectory,fname_prefix)
    else:
        FullChiBasinDF = Helper.AppendBasinCSVs(DataDirectory,fname_prefix)

    # Let's get the list of basins
    if basin_list == []:
        print("You didn't supply a list of basins, so I'm going to analyse all of them.")
        basin_list = list(FullChiBasinDF['basin_key'])

    # We're going to write our summary csv using pandas, woooo
    # First of all we need to set up the dataframe
    OutDF = pd.DataFrame()
    OutDF['basin_key'] = pd.Series(basin_list)

    # get the best fit m/n for each basin in the list from the full chi method
    if bootstrap_exists:
        FullChiMOverNDict = SimpleMaxMLECheck(FullChiBasinDF)
        OutDF['Chi_MLE_full'] = OutDF['basin_key'].map(FullChiMOverNDict)

    # get the best fit m/n from the points method
    if bootstrap_exists:
        if not parallel:
            PointsChiBasinDF = Helper.ReadMCPointsCSV(DataDirectory,fname_prefix)
        else:
            PointsChiBasinDF = Helper.AppendBasinPointCSVs(DataDirectory,fname_prefix)
        PointsChiBasinDF = PointsChiBasinDF[PointsChiBasinDF['basin_key'].isin(basin_list)]

        UncertaintyDF = GetMOverNRangeMCPoints(PointsChiBasinDF, start_movern, d_movern, n_movern)
        OutDF['Chi_MLE_points'] = UncertaintyDF['Median_MOverNs']
        OutDF['Chi_MLE_points_min'] = UncertaintyDF['Min_MOverNs']
        OutDF['Chi_MLE_points_max'] = UncertaintyDF['Max_MOverNs']

    print ("Getting the m/n from the SA data")

    # Updated on the 21/01/2019 by Boris: I added error management on that part. 
    # Cedric Roerig from Giessen is having issue with plotting routines and the code complains data is empty raising ValueError
    # This should sort it
    # Data is replaced by zeros for SA
    # Let me know if any issue appears because of that
    # B.G.

    # get the best fit m/n from the raw SA data
    try:
        RawSADF = SA.LinearRegressionRawData(DataDirectory,fname_prefix,basin_list)
        OutDF['SA_raw'] = RawSADF['regression_slope']
        OutDF['SA_raw_sterr'] = RawSADF['std_err']
        OutDF['SA_raw_R2'] = RawSADF['R2']
        OutDF['SA_raw_p'] = RawSADF['p_value']
    except ValueError:
        print("Your Slope-area data is somehow empty or not conform... I am skipping it and data will be 0")
        OutDF['SA_raw'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)
        OutDF['SA_raw_sterr'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)
        OutDF['SA_raw_R2'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)
        OutDF['SA_raw_p'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)

    # get the SA tributary information
    try:
        SATribsDF = SA.GetRangeMOverNRawDataByChannel(DataDirectory,fname_prefix,basin_list)
        OutDF['SA_tribs'] = SATribsDF['median_movern']
        OutDF['SA_tribs_min'] = SATribsDF['FirstQ_movern']
        OutDF['SA_tribs_max'] = SATribsDF['ThirdQ_movern']
    except ValueError:
        print("Your Slope-area data is somehow empty or not conform... I am skipping it and data will be 0")
        OutDF['SA_tribs'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)
        OutDF['SA_tribs_min'] =pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)
        OutDF['SA_tribs_max'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)


    # get the best fit m/n from the segmented SA data
    try:
        SASegmentedDF = SA.GetRangeMOverNSegmentedData(DataDirectory,fname_prefix,basin_list)
        OutDF['SA_segments'] = SASegmentedDF['median_movern']
        OutDF['SA_segments_min'] = SASegmentedDF['FirstQ_movern']
        OutDF['SA_segments_max'] = SASegmentedDF['ThirdQ_movern']
    except ValueError:
        print("Your Slope-area data is somehow empty or not conform... I am skipping it and data will be 0")
        OutDF['SA_segments'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)
        OutDF['SA_segments_min'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)
        OutDF['SA_segments_max'] = pd.Series(data = np.zeros(OutDF.shape[0]), index = OutDF.index)

    if Chi_disorder:
        if not parallel:
            DisorderDF = Helper.ReadDisorderUncertCSV(DataDirectory,fname_prefix)
        else:
            DisorderDF = Helper.AppendDisorderCSV(DataDirectory,fname_prefix)
        OutDF['Chi_disorder'] = DisorderDF['median']
        OutDF['Chi_disorder_min'] = DisorderDF['first_quartile']
        OutDF['Chi_disorder_max'] = DisorderDF['third_quartile']

    # print the SA segment data
    OutSAname = "_SA_segment_summary.csv"
    out_sa_name = summary_directory+fname_prefix+OutSAname
    SASegmentedDF.to_csv(out_sa_name,index=False)

    # now write the output dataframe to a csv file
    OutCSVname = "_movern_summary.csv"
    outname = summary_directory+fname_prefix+OutCSVname
    OutDF.to_csv(outname,index=False)


def CheckMLEOutliers(DataDirectory, fname_prefix, basin_list=[0], start_movern=0.2, d_movern=0.1, n_movern=7, parallel=False):
    """
    This function uses the fullstats files to search for outliers in the
    channels. It loops through m/n values and for each m/n value calculates which
    tributaries have outlying MLE and RMSE values. Each time a tributary has an outlying value,
    an outlier counter is incremented so that for each basin we end up with the number
    of times each of its tributaries has been selected as an outlier. This outlier counter is
    returned. From this outlier counter, the tributaries with outliers are ranked in
    descending order of outlier counts. These are iteratively removed. So for example,
    if one tributary has 4 outlier counts and another has 2, the first trib is removed,
    and MLE is recalculated, and then the second is also removed and MLE is recalculated again.
    The removed tributaries and the order in which they are removed is contained within
    the dictionary removed_sources_dict. Finally, a dictionary with the m/n value
    for each basin with the lowest MLE is returned. Each basin has a list where the first
    element is the MLE with no tributaries removed, the second with the first outlier
    count removed, etc. Note that if two tributaries ahve the same outlier counts
    then these are removed at the same time. The key into the dictionaries are the basin numbers.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.

    Returns:
        Outlier_counter (dict): This is a dictionary where the key is the basin
        number and the values are lists that have the cumulative number of times
        each tributary in the basin is detected to be an outlier.
        removed_sources_dict (dict): This is a dictionary where the key is the basin
        number and the values are lists of lists where the top level list is the steps of removing channels
        and the second level are the channels themselves. The reason there are nested
        lists and not individual channels being removed is because sometimes two
        channels have the same outlier counts and so they are removed at the same time.
        best_fit_movern_dict (dict): This is a dictionary where the key is the basin
        number and the values lists of the m/n ratio that has the lowest MLE. These are lists beacuse
        each element in the list represents the number of outlying tributaries removed.
        The first element is where no tributaries are removed.
        MLEs_dict (dictionary of arrays): The key is the basin number and the value is an
        array containing all the MLE values for each m/n and
        each iterated removed tributary.
        To get the MLEs of, say, the first removal for the 3rd basin you would use MLEs_dict[2][:,1]

    Author: SMM
    """

    # Get a vector of the m over n values
    print("start theta is: "+str(start_movern))
    print("d theta is: "+str(d_movern))
    print("n theta is: "+str(n_movern))
    end_movern = float(start_movern)+float(d_movern)*(float(n_movern)-1)
    print("end theta is: "+str(end_movern))                                                  
    m_over_n_values = np.linspace(start_movern,end_movern,n_movern)
    movern_strs = []
    for movern in m_over_n_values:
        movern_str = '%.2f'%movern
        if movern_str.endswith('0'):
            movern_str = movern_str[:-1]
            movern_strs.append(movern_str)

    # we open the first file just so that we can get a counter list
    print ("PARALLEL = ", parallel)
    if not parallel:
        FirstDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,movern_strs[0])
    else:
        FirstDF = Helper.AppendFullStatsCSVs(DataDirectory,movern_strs[0],fname_prefix)

    # get the number of basins
    basin_keys = list(FirstDF['basin_key'])
    basin_keys = [float(x) for x in basin_keys]

    # get the list of basins
    if basin_list == []:
        print("You didn't give me a list of basins, so I'll just run the analysis on all of them!")
        basin_list = basin_keys
        basin_set = set(basin_list)
        basin_list = list(basin_set)
        basin_list = [int(i) for i in basin_list]

    # make a data object that will hold the counters
    Outlier_counter = {}
    # loop through the basins
    for basin in basin_list:
        # mask the data so you only get the correct basin
        FirstDF_basin = FirstDF[FirstDF['basin_key'] == basin]

        trib_values = list(FirstDF_basin['test_source_key'])
        n_nodes = len(trib_values)
        # make the counter with zeros
        this_counter = np.zeros(n_nodes)
        Outlier_counter[basin] = this_counter

    # Now we loop through all the files, calculating the outliers
    for m_over_n in movern_strs:

        print("This_m_over_n is: "+m_over_n)

        #load the file
        if not parallel:
            FullStatsDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,m_over_n)
        else:
            FullStatsDF = Helper.AppendFullStatsCSVs(DataDirectory,m_over_n,fname_prefix)


        # loop through the basins
        for basin in basin_list:

            # mask the data so you only get the correct basin
            FullStatsDF_basin = FullStatsDF[FullStatsDF['basin_key'] == basin]

            # extract the relevant data
            MLE_values = list(FullStatsDF_basin['MLE'])
            RMSE_values = list(FullStatsDF_basin['RMSE'])
            trib_values = list(FullStatsDF_basin['test_source_key'])
            print ('N values: '+str(len(MLE_values))+' '+str(len(RMSE_values))+' '+str(len(trib_values)))

            # now get the outliers
            MLE_array = np.asarray(MLE_values)
            RMSE_array = np.asarray(RMSE_values)

            # Get the outliers using the MAD-based outlier function
            RMSE_outliers = LSDP.lsdstatsutilities.is_outlier(RMSE_array)
            MLE_outliers = LSDP.lsdstatsutilities.is_outlier(RMSE_array)

            # now check each of the outlier arrays to see if e need to flip the array
            RMSE_index_min = np.argmin(RMSE_array)
            #RMSE_min = np.min(RMSE_array)

            # if the max MLE is an outlier, flip the outlier vector
            if (RMSE_outliers[RMSE_index_min]):
                RMSE_outliers = [not i for i in RMSE_outliers]

            MLE_index_max = np.argmax(MLE_array)

            # if the max MLE is an outlier, flip the outlier vector
            if (MLE_outliers[MLE_index_max]):
                MLE_outliers = [not i for i in MLE_outliers]

            # turn the outliers vector into an integer
            int_Outlier = [int(i) for i in MLE_outliers]
            #print("Integer outliers are: ")
            #print(int_Outlier)

            # add this outlier counter to the outlier dict
            Outlier_counter[basin] = Outlier_counter[basin]+int_Outlier

    # Now try to calculate MLE by removing outliers

    # Set up dicts where the keys are the basin numbers and the
    # elements tell the m/n values of the best fit MLE, with each element
    # representing incrementally removed tributaries
    # The removed_sources_dict refers to the sources being removed each step
    best_fit_movern_dict = {}
    removed_sources_dict = {}
    MLEs_dict = {}
    for basin_number in basin_list:
        remove_list_index,movern_of_max_MLE,MLEs = Iteratively_recalculate_MLE_removing_outliers_for_basin(Outlier_counter,
                                                                                DataDirectory,
                                                                                fname_prefix,
                                                                                basin_number,
                                                                                start_movern,
                                                                                d_movern,
                                                                                n_movern, parallel)
        best_fit_movern_dict[basin_number] = movern_of_max_MLE
        removed_sources_dict[basin_number] = remove_list_index
        MLEs_dict[basin_number] = MLEs

    print("Here are the vitalstatisix, chief: ")
    print(best_fit_movern_dict)

    return Outlier_counter, removed_sources_dict, best_fit_movern_dict, MLEs_dict

def Iteratively_recalculate_MLE_removing_outliers_for_basin(Outlier_counter, DataDirectory, fname_prefix, basin_number, start_movern=0.2, d_movern=0.1, n_movern=7, parallel=False):
    """
    This function drives the calculations for removing outliers incrementally
    from the MLE calculations. This is specific to a basin.
    It calls functions for specific m/n values

    Args:
        Outlier_counter (dict): The dictionary containing the outlier lists for each basin
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_number (list): a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.

    Returns:
        remove_list_index (list of list): This is the sequence of tributaries that will be removed
        movern_of_max_MLE (list): The m over n values of the maximum MLE sequentially for removed tributaries
        MLEs (array): This is an array containing all the MLE values for each m/n and
        each iterated removed tributary. To get the MLEs of, say, the first removal you would use MLEs[:,1]

    Author: SMM
    """

    # get the outlier counter for this basin
    thisBasinOutlierCounter = Outlier_counter[basin_number]

    print("I am going to recalculate MLE for basin: "+str(basin_number))
    #print("The counter list is:")
    #print(thisBasinOutlierCounter)

    # Get the sroted version and the indices into the sorted version
    sort_index = np.argsort(thisBasinOutlierCounter)
    sorted_outliers = np.sort(thisBasinOutlierCounter)

    # make sure the sourted outliers are ints
    int_sorted_outliers = [int(i) for i in sorted_outliers]

    # we need to reverse these lists so that the biggest outlier counts come first
    sort_index = sort_index[::-1]
    sorted_outliers = sorted_outliers[::-1]

    # get all the duplicates, with the total number of duplicates for each counter
    # This uses the unbelievably handy collections.Counter tool
    from collections import Counter
    all_counter_dict=Counter(int_sorted_outliers)

    # pop out the zero duplicates: we don't exclude non-outlier data
    all_counter_dict.pop(0, None)

    # now we need to iteratively remove the offending counters.
    remove_list = []
    remove_list_index = []
    last_count = -1

    # Only enter the loop if there are positive counts
    if all_counter_dict != 0:

        # Now loop through the sorted outlier count
        for idx,sorted_outlier_count in enumerate(sorted_outliers):

            # first check if this has more than one count
            if sorted_outlier_count == 0:
                all_removed = True
            else:
                # if this is the first element, add it to the remove list
                #if len(remove_list) == 0:
                #    remove_list.append([])
                #    remove_list_index.append([])

                # get the number of counts
                #n_counts = all_counter_dict[sorted_outlier_count]

                # either append the count and index to the current count
                # or make a new list for the next count
                if sorted_outlier_count != last_count:
                    remove_list.append([])
                    remove_list_index.append([])

                remove_list[-1].append(int(sorted_outlier_count))
                remove_list_index[-1].append(sort_index[idx])

                last_count = sorted_outlier_count

    # now print out the lists
    #print("remove list is: ")
    #print(remove_list)

    #print("and index of this list is: ")
    #print(remove_list_index)

    # now we loop through m over n, incrementally removing the tributaries
    # this is done by copying the MLE vector and then replacing
    # the offending channels with an MLE of 1
    # Get a vector of the m over n values
    end_movern = float(start_movern)+float(d_movern)*(float(n_movern)-1)
    m_over_n_values = np.linspace(start_movern,end_movern,n_movern)

    # Now get the movern values
    movern_of_max_MLE, MLEs = Calculate_movern_after_iteratively_removing_outliers(m_over_n_values,
                                                                             DataDirectory,
                                                                             fname_prefix,
                                                                             basin_number,
                                                                             remove_list_index, parallel)

    # Returns the remove_list_index, which is a list where each element
    # is a list of tributaries removed in an iteration,
    # And the maximum MLE values in each iteration.
    return remove_list_index,movern_of_max_MLE, MLEs



def Calculate_movern_after_iteratively_removing_outliers(movern_list, DataDirectory,
                                                         fname_prefix, basin_number,
                                                         remove_list_index, parallel=False):
    """
    This function takes the remove list index, which contains information about
    the sequence of tributaries to be removed, and then recalculates MLE by incrementally
    removing tributaries.

    Args:
        movern_list (float): m/n value.
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_number (int): The basin you want
        remove_list_index (list of lists): This contains information about what tributaries to remove

    Returns:
        movern_of_max_MLE (list): A list containing the m/n values of the basin after outlying
        tributaries have been removed. Each element in the list represents an increment of
        tributary removed. The first element is with no tributaries removed.
        MLEs (array): This is an array containing all the MLE values for each m/n and
        each iterated removed tributary. To get the MLEs of, say, the first removal you would use MLEs[:,1]

    Author: SMM
    """
    # Loop through m over n values and recalculate MLE values after removing
    # the outlying data
    All_MLE = []
    for m_over_n in movern_list:
        movern_str = '%.2f'%m_over_n     # need to convert to a string here so files are read correctly. FJC 16/02/18
        if movern_str.endswith('0'):
            movern_str = movern_str[:-1]
    #    print movern_str

        MLE_vals = RecalculateTotalMLEWithRemoveList(DataDirectory, fname_prefix,
                                                     movern_str,basin_number, remove_list_index, parallel)
        All_MLE.append(MLE_vals)


    MLEs = np.asarray(All_MLE)
    index_of_maximums = np.argmax(MLEs,0)

    movern_of_max_MLE = []
    for index in index_of_maximums:
        movern_of_max_MLE.append(movern_list[index])

    # This is required because linspace gives floating point errors
    movern_of_max_MLE = np.around(movern_of_max_MLE,4)

    print("The MLEs for no removal are: ")
    print(MLEs[:,0])

    # Return the m/n ratio with the biggest MLE, but also the array
    # with all the MLE values
    return movern_of_max_MLE, MLEs

def RecalculateTotalMLEWithRemoveList(DataDirectory, fname_prefix,
                                      movern,basin_number, remove_list_index, parallel=False):
    """
    This function takes the remove list index and then recalculates MLE by incrementally
    removing tributaries

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        movern (float): m/n value.
        basin_number (int): The basin you want
        remove_list_index (list of lists): This contains information about what tributaries to remove

    Returns:
        MLE_vals (list): The MLE data with incrementally removed tributaries

    Author: SMM
    """
    #load the file
    if not parallel:
        FullStatsDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,movern)
    else:
        FullStatsDF = Helper.AppendFullStatsCSVs(DataDirectory,movern,fname_prefix)

    # mask the data so you only get the correct basin
    FullStatsDF_basin = FullStatsDF[FullStatsDF['basin_key'] == basin_number]

    # extract the relevant data
    MLE_values = list(FullStatsDF_basin['MLE'])
    #trib_values = list(FullStatsDF_basin['test_source_key'])

    # get the MLE_values as an array
    MLE_values = np.asarray(MLE_values)

    #print("The total MLE is: ")
    #print(np.prod(MLE_values))

    MLE_vals = []
    MLE_vals.append(np.prod(MLE_values))

    # now loop through the remove list
    remove_list = []
    for stuff_to_remove in remove_list_index:
        this_MLE = MLE_values

        # need to extend the remove list with these tribs
        remove_list.extend(stuff_to_remove)

        # now set the MLE of the removed tribs to 1
        for idx in remove_list:
            this_MLE[idx] = 1

        #print("\n REMOVING; The total MLE is: ")
        #print(np.prod(this_MLE))
        MLE_vals.append(np.prod(this_MLE))

    return MLE_vals

#=============================================================================
# PLOTTING FUNCTIONS
# Make plots of the m/n analysis
#=============================================================================

def MakePlotsWithMLEStats(DataDirectory, fname_prefix, basin_list = [0],
                  start_movern = 0.2, d_movern = 0.1, n_movern = 7, parallel=False):
    """
    This function makes a chi-elevation plot for each basin and each value of
    m/n and prints the MLE value between the tributaries and the main stem.
    The plot with the maximum value of MLE is highlighted in red (suggesting
    that this should be the appropriate m/n value for this basin). Channels
    are coloured by elevation.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.

    Returns:
        Plot of each m/n value for each basin.

    Author: SMM, modified by FJC
    """

    # check if a directory exists for the chi plots. If not then make it.
    MLE_directory = DataDirectory+'basic_chi_plots/'
    if not os.path.isdir(MLE_directory):
        os.makedirs(MLE_directory)

    profile_suffix = "_movern.csv"
    basin_stats_suffix = "_movernstats_basinstats.csv"

    movern_profile_file = fname_prefix+profile_suffix
    movern_basin_stats_file = fname_prefix+basin_stats_suffix

    end_movern = start_movern+d_movern*(n_movern-1)
    m_over_n_values = np.linspace(start_movern,end_movern,n_movern)

    # get the maximum MLE of each basin
    if not parallel:
        pd_DF = Helper.ReadBasinStatsCSV(DataDirectory, fname_prefix)
    else:
        pd_DF = Helper.AppendBasinStatsCSVs(DataDirectory,fname_prefix)

    shp = pd_DF.shape
    max_MLEs = []
    max_MLEs_index = []
    for i in range(0,shp[0]):
        #print("I is: "+str(i))
        a = pd_DF.loc[[i]]
        b = np.asarray(a)
        c = b[0,2:]
        max_MLEs.append(max(c))
        max_MLEs_index.append(np.argmax(c))
    m_over_n_of_max = []

    for idx in max_MLEs_index:
        m_over_n_of_max.append(m_over_n_values[idx])

    print("The m over n of these max are: ")
    print(m_over_n_of_max)

    n_basins = len(max_MLEs)

    label_size = 12

    # Set up fonts for plots
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size
    size_format = "default"

    # make a figure
    fig = makefigure(size_format)

    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=1.0,top=1.0)
    ax = fig.add_subplot(gs[5:100,10:95])

    # load the m_over_n data file
    thisPointData = LSDP.LSDMap_PointData(DataDirectory,movern_profile_file,parallel=parallel)
    allBasinStatsData = LSDP.LSDMap_PointData(DataDirectory+movern_basin_stats_file)

    print("m over n values are: ")
    print(m_over_n_values)

    mn_legends = []
    for mn in m_over_n_values:
        mn_legends.append("m_over_n = "+str(round(mn,4)))

    print("The mn labels are: ")
    print(mn_legends)

    # get the data form the profiles
    elevation = thisPointData.QueryData('elevation')
    elevation = [float(x) for x in elevation]
    basin = thisPointData.QueryData('basin_key')
    basin = [int(x) for x in basin]
    source = thisPointData.QueryData('source_key')
    source = [int(x) for x in source]

    # get the basin keys
    allstats_basinkeys = allBasinStatsData.QueryData("basin_key")
    allstats_basinkeys = [int(x) for x in allstats_basinkeys]

    # need to convert everything into arrays so we can mask different basins
    Elevation = np.asarray(elevation)
    Basin = np.asarray(basin)
    #Source = np.asarray(source)

    # Loop through m/n values aggregating data
    for idx,mn in enumerate(m_over_n_values):

        counter = str(idx).zfill(3)
        print("Counter is: "+counter)
        # first get the chi values for this m_over_n
        #print ("mn is: " + str(mn))
        #print("index is: "+str(idx))
        mn_legend = "m_over_n = "+str(mn)
        #print("I am looking for the data element: "+mn_legend)
        this_chi = thisPointData.QueryData(mn_legend)

        # get the MLE value for this m/n
        this_MLE = allBasinStatsData.QueryData(mn_legend)

        # convert to a numpy array for masking
        Chi = np.asarray(this_chi)

        # some info about the chi and elevation values
        #max_chi = np.amax(Chi)
        #max_Elevation = np.amax(Elevation)
        #min_Elevation = np.amin(Elevation)

        #z_axis_min = int(min_Elevation/10)*10
        #z_axis_max = int(max_Elevation/10)*10+10
        #chi_axis_max = int(max_chi/5)*5+5

        # Now mask the data. Initially we will do only basin 0
        if basin_list == []:
            print("You didn't give me any basins so I assume you want all of them.")
            basin_list = range(0,n_basins-1)

        for basin_key in basin_list:
            # now we need to find out if this basin is in the allstats file,
            # and if so what index it is
            #this_basin_index = -99
            #for ii,bk in enumerate(allstats_basinkeys):
            #    #print("index: "+str(ii)+" and basin_key is: "+str(bk))
            #    if (bk == basin_key):
            #        this_basin_index = idx

            #if(this_basin_index != -99):
            #    MLE = this_MLE[this_basin_index]
            #else:
            #    MLE = "NaN"
            MLE = this_MLE[basin_key]
            #MLE_str = str(MLE)
            #short_MLE = str("%03.02e" % round(MLE,2))
            short_MLE = str(round(MLE,3))
            print("The short MLE is: "+short_MLE)

            #print("The MLE of this basin for this m over n is: "+short_MLE)

            # this gets the mask (for the chosen basin)
            m = np.ma.masked_where(Basin!=basin_key, Basin)

            # this is the masked chi value
            maskX = np.ma.masked_where(np.ma.getmask(m), Chi)
            maskElevation = np.ma.masked_where(np.ma.getmask(m), Elevation)

            # now plot the data with a colourmap
            ax.scatter(maskX,maskElevation,s=2.5, c=maskElevation,cmap="terrain",edgecolors='none')

            # some formatting of the figure
            ax.spines['top'].set_linewidth(1)
            ax.spines['left'].set_linewidth(1)
            ax.spines['right'].set_linewidth(1)
            ax.spines['bottom'].set_linewidth(1)

            # make the lables
            ax.set_xlabel("$\chi$ (m)")
            ax.set_ylabel("Elevation (m)")

            # This affects all axes because we set share_all = True.
            #ax.set_ylim(z_axis_min,z_axis_max)
            #ax.set_ylim(z_axis_min,z_axis_max)
            #ax.set_xlim(0,chi_axis_max)
            #ax.text("Basin = " +mn_legend+", $m/n$ = "+str(mn))

            #newline = "\n"
            title_string = "Basin "+str(basin_key)+", $m/n$ = "+str(mn)
            title_string2 = "MLE = "+short_MLE
            ax.text(0.05, 0.95, title_string,
                    verticalalignment='top', horizontalalignment='left',
                    transform=ax.transAxes,
                    color='black', fontsize=10)
            # print("The basin index is: "+str(basin_key)+" and the max index is: "+str(max_MLEs_index[basin_key]))
            # if( idx == max_MLEs_index[basin_key]):
            #     print("This m/n is: "+str(mn)+" and it is the maximum MLE")
            #     ax.text(0.05, 0.88, title_string2+", maximum MLE in basin.",
            #         verticalalignment='top', horizontalalignment='left',
            #         transform=ax.transAxes,
            #         color='red', fontsize=10)
            # else:
            #     ax.text(0.05, 0.88, title_string2,
            #         verticalalignment='top', horizontalalignment='left',
            #         transform=ax.transAxes,
            #         color='black', fontsize=10)

            #save the plot
            newFilename = MLE_directory+"Chi_profiles_basin_"+str(basin_key)+"_"+counter+".png"

            # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
            ax.tick_params(axis='both', width=1, pad = 2)
            for tick in ax.xaxis.get_major_ticks():
                tick.set_pad(2)

            FigFormat = "png"
            plt.savefig(newFilename,format=FigFormat,dpi=300)
            ax.cla()

def MakeChiPlotsMLE(DataDirectory, fname_prefix, basin_list=[0], start_movern=0.2, d_movern=0.1, n_movern=7,
                    size_format='ESURF', FigFormat='png', animate=False, keep_pngs=False, parallel=False):
    """
    This function makes chi-elevation plots for each basin and each value of m/n
    where the channels are coloured by the MLE value compared to the main stem.
    The main stem is plotted in black.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        animate (bool): If this is true then it creates a movie of the chi-elevation plots coloured by MLE.
        keep_pngs (bool): If this is false and the animation flag is true, then the pngs are deleted and just the video is kept.

    Returns:
        Plot of each m/n value for each basin.

    Author: FJC
    """
    from matplotlib.ticker import FormatStrFormatter

    # check if a directory exists for the chi plots. If not then make it.
    MLE_directory = DataDirectory+'chi_plots/'
    if not os.path.isdir(MLE_directory):
        os.makedirs(MLE_directory)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)

    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=1.0,top=1.0)
    ax = fig.add_subplot(gs[10:95,5:80])
    #colorbar axis
    ax2 = fig.add_subplot(gs[10:95,82:85])

    # read in the csv files
    if not parallel:
        ProfileDF = Helper.ReadChiProfileCSV(DataDirectory, fname_prefix)
        BasinStatsDF = Helper.ReadBasinStatsCSV(DataDirectory, fname_prefix)
    else:
        ProfileDF = Helper.AppendMovernCSV(DataDirectory, fname_prefix)
        BasinStatsDF = Helper.AppendBasinCSVs(DataDirectory, fname_prefix)

    # get the number of basins
    basin_keys = list(BasinStatsDF['basin_key'])
    basin_keys = [int(x) for x in basin_keys]

    # get the list of basins
    if basin_list == []:
        print("You didn't give me a list of basins, so I'll just run the analysis on all of them!")
        basin_list = basin_keys

    # loop through each m over n value
    end_movern = float(start_movern)+float(d_movern)*(float(n_movern)-1)
    m_over_n_values = np.linspace(start_movern,end_movern,n_movern)

    # best fit moverns
    best_fit_moverns = SimpleMaxMLECheck(BasinStatsDF)

    for m_over_n in m_over_n_values:
        # read in the full stats file

        #Stupid floating point representation issues
        movern_str = "%.2f" % round(m_over_n,2)
        if movern_str.endswith('0'):
            movern_str = movern_str[:-1]

        print("This m/n is: "+movern_str)
        if not parallel:
            FullStatsDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,movern_str)
        else:
            FullStatsDF = Helper.AppendFullStatsCSVs(DataDirectory,movern_str,fname_prefix)

        # loop through all the basins in the basin list
        for basin_key in basin_list:
            print("This basin key is: "+str(basin_key))

            # mask the data frames for this basin
            ProfileDF_basin = ProfileDF[ProfileDF['basin_key'] == basin_key]
            FullStatsDF_basin = FullStatsDF[FullStatsDF['basin_key'] == basin_key]

            #print FullStatsDF_basin
            print ("Getting the reference_source_key")

            print(FullStatsDF_basin.iloc[0]['reference_source_key'])

            # get the data frame for the main stem
            ProfileDF_MS = ProfileDF_basin[ProfileDF_basin['source_key'] == FullStatsDF_basin.iloc[0]['reference_source_key']]

            # get the data frame for the tributaries
            ProfileDF_basin = ProfileDF_basin[ProfileDF_basin['source_key'] != FullStatsDF_basin.iloc[0]['reference_source_key']]
            # merge with the full data to get the MLE for the tributaries
            ProfileDF_tribs = ProfileDF_basin.merge(FullStatsDF_basin, left_on = "source_key", right_on = "test_source_key")

            # get the chi and elevation data for the main stem
            movern_key = 'm_over_n = %s' % movern_str
            MainStemX = list(ProfileDF_MS[movern_key])
            MainStemElevation = list(ProfileDF_MS['elevation'])

            # get the chi, elevation, and MLE for the tributaries
            TributariesX = list(ProfileDF_tribs[movern_key])
            TributariesElevation = list(ProfileDF_tribs['elevation'])
            TributariesMLE = list(ProfileDF_tribs['MLE'])

            # get the colourmap to colour channels by the MLE value
            #NUM_COLORS = len(MLE)
            MLE_array = np.asarray(TributariesMLE)
            this_cmap = plt.cm.coolwarm
            cNorm  = colors.Normalize(vmin=np.min(MLE_array), vmax=np.max(MLE_array))
            plt.cm.ScalarMappable(norm=cNorm, cmap=this_cmap)

            # now plot the data with a colourmap
            sc = ax.scatter(TributariesX,TributariesElevation,c=TributariesMLE,cmap=this_cmap, norm=cNorm, s=2.5, edgecolors='none')
            ax.plot(MainStemX,MainStemElevation,lw=2, c='k')

            # some formatting of the figure
            ax.spines['top'].set_linewidth(1)
            ax.spines['left'].set_linewidth(1)
            ax.spines['right'].set_linewidth(1)
            ax.spines['bottom'].set_linewidth(1)

            # make the lables
            ax.set_xlabel("$\chi$ (m)")
            ax.set_ylabel("Elevation (m)")

            # the best fit m/n
            best_fit_movern = best_fit_moverns[basin_key]

            # label with the basin and m/n
            title_string = "Basin "+str(basin_key)+", "+ r"$\theta$ = "+movern_str
            if best_fit_movern == m_over_n:
                ax.text(0.05, 0.95, title_string,
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='red', fontsize=10)
            else:
                ax.text(0.05, 0.95, title_string,
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='black', fontsize=10)

            # add the colorbar
            colorbarlabel = "$MLE$"
            cbar = plt.colorbar(sc,cmap=this_cmap,spacing='uniform', orientation='vertical',cax=ax2)
            cbar.set_label(colorbarlabel, fontsize=10)
            ax2.set_ylabel(colorbarlabel, fontname='Liberation Sans', fontsize=10)
            ax2.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))

            #save the plot
            newFilename = MLE_directory+"MLE_profiles"+str(basin_key)+"_"+movern_str+"."+str(FigFormat)

            # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
            ax.tick_params(axis='both', width=1, pad = 2)
            for tick in ax.xaxis.get_major_ticks():
                tick.set_pad(2)

            plt.savefig(newFilename,format=FigFormat,dpi=300)
            ax.cla()
            ax2.cla()

    if animate:
        # animate the pngs using ffmpeg
        system_call = "ffmpeg -framerate 3 -pattern_type glob -i '"+MLE_directory+"MLE_profiles*.png' -y -vcodec libx264 -s 1230x566 -pix_fmt yuv420p "+MLE_directory+"MLE_profiles.mp4"
        print (system_call)
        subprocess.call(system_call, shell=True)
        # delete the pngs if you want
        if not keep_pngs:
            system_call = "rm "+MLE_directory+"MLE_profiles*.png"
            subprocess.call(system_call, shell=True)
    plt.close(fig)

    
    
def MakeChiPlotsChi(DataDirectory, fname_prefix, basin_list=[0], start_movern=0.2, d_movern=0.1, n_movern=7,
                    size_format='ESURF', FigFormat='png', animate=False, keep_pngs=False, parallel=False):
    """
    This function makes chi-elevation plots for each basin and each value of m/n
    where the channels are coloured by the chi value compared to the main stem.
    The main stem is plotted in black.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        animate (bool): If this is true then it creates a movie of the chi-elevation plots coloured by MLE.
        keep_pngs (bool): If this is false and the animation flag is true, then the pngs are deleted and just the video is kept.

    Returns:
        Plot of each m/n value for each basin.

    Author: SMM, from FJC code
    """
    from matplotlib.ticker import FormatStrFormatter

    # check if a directory exists for the chi plots. If not then make it.
    MLE_directory = DataDirectory+'chi_plots/'
    if not os.path.isdir(MLE_directory):
        os.makedirs(MLE_directory)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)

    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=1.0,top=1.0)
    ax = fig.add_subplot(gs[10:95,5:90])
    
    #colorbar axis
    #ax2 = fig.add_subplot(gs[10:95,82:85])

    # read in the csv files
    if not parallel:
        ProfileDF = Helper.ReadChiProfileCSV(DataDirectory, fname_prefix)
        DisorderDF = Helper.ReadDisorderUncertCSV(DataDirectory, fname_prefix)
    else:
        ProfileDF = Helper.AppendMovernCSV(DataDirectory, fname_prefix)
        DisorderDF = Helper.AppendDisorderCSV(DataDirectory, fname_prefix)
    best_fit_moverns = DisorderDF['median'].tolist()
    
    # get the number of basins
    basin_keys = list(DisorderDF['basin_key'])
    basin_keys = [int(x) for x in basin_keys]
    
    MOverNDict = dict(zip(basin_keys,best_fit_moverns))

    # get the list of basins
    if basin_list == []:
        print("You didn't give me a list of basins, so I'll just run the analysis on all of them!")
        basin_list = basin_keys

    # loop through each m over n value
    end_movern = float(start_movern)+float(d_movern)*(float(n_movern)-1)
    m_over_n_values = np.linspace(start_movern,end_movern,n_movern)

    for m_over_n in m_over_n_values:
        # read in the full stats file

        #Stupid floating point representation issues
        movern_str = "%.2f" % round(m_over_n,2)
        if movern_str.endswith('0'):
            movern_str = movern_str[:-1]

        print("This concavity is: "+movern_str)

        # loop through all the basins in the basin list
        for basin_key in basin_list:
            print("This basin key is: "+str(basin_key))
            #print("The best fit concavity is:")
            #print(MOverNDict[basin_key])
            
            # Format the best fit concavity string
            bf_movernstr = "%.2f" % round(MOverNDict[basin_key],2)
            if bf_movernstr.endswith('0'):
                bf_movernstr = bf_movernstr[:-1]
                
            #print("This concavity is:")
            #print(movern_str)
            #print("Best fit concavity in string format is:")
            #print(bf_movernstr)
            
            this_is_bf_concavity = False
            if (movern_str == bf_movernstr):
                #print("==================================")
                #print("This is the best fitting concavity")
                #print("==================================")
                this_is_bf_concavity = True
                
            # mask the data frames for this basin
            ProfileDF_basin = ProfileDF[ProfileDF['basin_key'] == basin_key]

            # get the chi and elevation data for the main stem
            movern_key = 'm_over_n = %s' % movern_str
            X = list(ProfileDF_basin[movern_key])
            Elevation = list(ProfileDF_basin['elevation'])

            # get the colourmap to colour channels by the MLE value
            #NUM_COLORS = len(MLE)
            MLE_array = np.asarray(X)
            this_cmap = plt.cm.coolwarm
            cNorm  = colors.Normalize(vmin=np.min(X), vmax=np.max(X))
            plt.cm.ScalarMappable(norm=cNorm, cmap=this_cmap)

            # now plot the data with a colourmap
            if this_is_bf_concavity:
                sc = ax.scatter(X,Elevation,c='r', s=2.5, edgecolors='none')
                #sc = ax.scatter(X,Elevation,c=X,cmap=this_cmap, norm=cNorm, s=2.5, edgecolors='none')
            else:
                sc = ax.scatter(X,Elevation,c='k', s=2.5, edgecolors='none')
                #sc = ax.scatter(X,Elevation,c=X,cmap=this_cmap, norm=cNorm, s=2.5, edgecolors='none')
                
            

            # some formatting of the figure
            ax.spines['top'].set_linewidth(1)
            ax.spines['left'].set_linewidth(1)
            ax.spines['right'].set_linewidth(1)
            ax.spines['bottom'].set_linewidth(1)

            # make the lables
            ax.set_xlabel("$\chi$ (m)")
            ax.set_ylabel("Elevation (m)")

            # the best fit m/n
            best_fit_movern = best_fit_moverns[basin_key]
            
            #print("The best fit concavity is: "+str(best_fit_movern))

            # label with the basin and m/n
            title_string = "Basin "+str(basin_key)+", "+ r"$\theta$ = "+movern_str
            if this_is_bf_concavity:
                ax.text(0.05, 0.95, title_string,
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='red', fontsize=10)
                ax.text(0.05, 0.85, "Best fit concavity",
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='red', fontsize=10)
            else:
                ax.text(0.05, 0.95, title_string,
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='black', fontsize=10)

            # add the colorbar
            #colorbarlabel = "$\chi$ (m)"
            #cbar = plt.colorbar(sc,cmap=this_cmap,spacing='uniform', orientation='vertical',cax=ax2)
            #cbar.set_label(colorbarlabel, fontsize=10)
            #ax2.set_ylabel(colorbarlabel, fontname='Liberation Sans', fontsize=10)
            #ax2.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))

            #save the plot
            newFilename = MLE_directory+"chi_profiles"+str(basin_key)+"_"+movern_str+"."+str(FigFormat)

            # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
            ax.tick_params(axis='both', width=1, pad = 2)
            for tick in ax.xaxis.get_major_ticks():
                tick.set_pad(2)

            plt.savefig(newFilename,format=FigFormat,dpi=300)
            ax.cla()
            #ax2.cla()

    if animate:
        # animate the pngs using ffmpeg
        system_call = "ffmpeg -framerate 3 -pattern_type glob -i '"+MLE_directory+"chi_profiles*.png' -y -vcodec libx264 -s 1230x566 -pix_fmt yuv420p "+MLE_directory+"chi_profiles.mp4"
        print (system_call)
        subprocess.call(system_call, shell=True)
        # delete the pngs if you want
        if not keep_pngs:
            system_call = "rm "+MLE_directory+"chi_profiles*.png"
            subprocess.call(system_call, shell=True)
    plt.close(fig)    
    
    
def MakeChiPlotsColouredByK(DataDirectory, fname_prefix, basin_list=[0], start_movern=0.2, d_movern=0.1, n_movern=7,size_format='ESURF', FigFormat='png', animate=False, keep_pngs=False, parallel=False):
    """
    This function makes chi-elevation plots for each basin and each value of m/n
    where the channels are coloured by the K value (for model runs with spatially varying K).

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        animate (bool): If this is true then it creates a movie of the chi-elevation plots coloured by MLE.
        keep_pngs (bool): If this is false and the animation flag is true, then the pngs are deleted and just the video is kept.

    Returns:
        Plot of each m/n value for each basin.

    Author: FJC
    """
    from LSDPlottingTools import colours

    # check if a directory exists for the chi plots. If not then make it.
    K_directory = DataDirectory+'chi_plots_K/'
    if not os.path.isdir(K_directory):
        os.makedirs(K_directory)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)


    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=0.95,top=1.0)
    ax = fig.add_subplot(gs[10:95,5:80])
    #colorbar axis
    ax2 = fig.add_subplot(gs[10:95,82:85])

    # read in the csv files
    if not parallel:
        ProfileDF = Helper.ReadChiProfileCSV(DataDirectory, fname_prefix)
        BasinStatsDF = Helper.ReadBasinStatsCSV(DataDirectory, fname_prefix)
    else:
        ProfileDF = Helper.AppendMovernCSV(DataDirectory,fname_prefix)
        BasinStatsDF = Helper.AppendBasinCSVs(DataDirectory,fname_prefix)

    # get the number of basins
    basin_keys = list(BasinStatsDF['basin_key'])
    basin_keys = [int(x) for x in basin_keys]

    # get the list of basins
    if basin_list == []:
        print("You didn't give me a list of basins, so I'll just run the analysis on all of them!")
        basin_list = basin_keys

    # loop through each m over n value
    end_movern = start_movern+d_movern*(n_movern-1)
    m_over_n_values = np.linspace(start_movern,end_movern,n_movern)

    # best fit moverns
    best_fit_moverns = SimpleMaxMLECheck(BasinStatsDF)

    for m_over_n in m_over_n_values:
        # read in the full stats file
        print("This m/n is: "+str(m_over_n))
        if not parallel:
            FullStatsDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,m_over_n)
        else:
            FullStatsDF = Helper.AppendFullStatsCSVs(DataDirectory,m_over_n,fname_prefix)

        # loop through all the basins in the basin list
        for basin_key in basin_list:
            print("This basin key is %s") %str(basin_key)

            # mask the data frames for this basin
            ProfileDF_basin = ProfileDF[ProfileDF['basin_key'] == basin_key]
            FullStatsDF_basin = FullStatsDF[FullStatsDF['basin_key'] == basin_key]

            #print FullStatsDF_basin
            print ("Getting the reference_source_key")

            print(FullStatsDF_basin.iloc[0]['reference_source_key'])

            # get the data frame for the main stem
            ProfileDF_MS = ProfileDF_basin[ProfileDF_basin['source_key'] == FullStatsDF_basin.iloc[0]['reference_source_key']]

            # get the data frame for the tributaries
            ProfileDF_basin = ProfileDF_basin[ProfileDF_basin['source_key'] != FullStatsDF_basin.iloc[0]['reference_source_key']]
            # merge with the full data to get the MLE for the tributaries
            ProfileDF_tribs = ProfileDF_basin.merge(FullStatsDF_basin, left_on = "source_key", right_on = "test_source_key")

            # get the chi and elevation data for the main stem
            movern_key = 'm_over_n = %s' %(str(m_over_n))
            MainStemX = list(ProfileDF_MS[movern_key])
            MainStemElevation = list(ProfileDF_MS['elevation'])
            MainStemK = list(ProfileDF_MS['K_value'])

            # get the chi, elevation, and MLE for the tributaries
            TributariesX = list(ProfileDF_tribs[movern_key])
            TributariesElevation = list(ProfileDF_tribs['elevation'])
            TributariesK = list(ProfileDF_tribs['K_value'])

            # get the colourmap to colour channels by the MLE value
            #NUM_COLORS = len(MLE)
            K_array = np.asarray(TributariesK)
            min_K = np.min(K_array)
            max_K = np.max(K_array)
            this_cmap = plt.cm.Spectral
            n_colours = 10
            this_cmap = colours.cmap_discretize(n_colours, this_cmap)
            cNorm  = colors.Normalize(vmin=min_K, vmax=max_K)
            plt.cm.ScalarMappable(norm=cNorm, cmap=this_cmap)

            # now plot the data with a colourmap
            sc = ax.scatter(TributariesX,TributariesElevation,c=TributariesK,cmap=this_cmap, norm=cNorm, s=2.5, edgecolors='none')
            sc = ax.scatter(MainStemX, MainStemElevation,c=MainStemK,cmap=this_cmap, norm=cNorm, s=2.5, edgecolors='none')

            # some formatting of the figure
            ax.spines['top'].set_linewidth(1)
            ax.spines['left'].set_linewidth(1)
            ax.spines['right'].set_linewidth(1)
            ax.spines['bottom'].set_linewidth(1)

            # make the labels
            ax.set_xlabel("$\chi$ (m)")
            ax.set_ylabel("Elevation (m)")

            # the best fit m/n
            best_fit_movern = best_fit_moverns[basin_key]
            print ("BEST FIT M/N IS: "+ str(best_fit_movern))
            print ("THIS M/N IS: "+str(m_over_n))

            # label with the basin and m/n
            title_string = "Basin "+str(basin_key)+", $m/n$ = "+str(m_over_n)
            if best_fit_movern == m_over_n:
                ax.text(0.05, 0.95, title_string,
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='red', fontsize=10)
            else:
                ax.text(0.05, 0.95, title_string,
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='black', fontsize=10)

            # add the colorbar
            colorbarlabel = "$K$"
            cbar = plt.colorbar(sc,cmap=this_cmap,spacing='uniform', orientation='vertical',cax=ax2)
            cbar.set_label(colorbarlabel, fontsize=10)
            ax2.set_ylabel(colorbarlabel, fontname='Liberation Sans', fontsize=10)

            #change labels to scientific notation
            colours.fix_colourbar_ticks(cbar,n_colours, cbar_type=float, min_value = min_K, max_value = max_K, cbar_label_rotation=0, cbar_orientation='vertical')
            # we need to get linear values between min and max K
            these_labels = np.linspace(min_K,max_K,n_colours)
            # now round these and convert to scientific notation
            these_labels = [str('{:.2e}'.format(float(x))) for x in these_labels]
            new_labels = []
            for label in these_labels:
                a,b = label.split("e")
                b = b.replace("0", "")
                new_labels.append(a+' x 10$^{%s}$' % b)

            ax2.set_yticklabels(new_labels, fontsize=8)

            #save the plot
            newFilename = K_directory+"Chi_profiles_by_K_"+str(basin_key)+"_"+str(m_over_n)+"."+str(FigFormat)

            # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
            ax.tick_params(axis='both', width=1, pad = 2)
            for tick in ax.xaxis.get_major_ticks():
                tick.set_pad(2)

            plt.savefig(newFilename,format=FigFormat,dpi=300)
            ax.cla()
            ax2.cla()

    if animate:
        # animate the pngs using ffmpeg
        system_call = "ffmpeg -framerate 3 -pattern_type glob -i '"+K_directory+"Chi_profiles_by_K*.png' -y -vcodec libx264 -s 1230x566 -pix_fmt yuv420p "+K_directory+"Chi_profiles_by_K.mp4"
        print (system_call)
        subprocess.call(system_call, shell=True)
        # delete the pngs if you want
        if not keep_pngs:
            system_call = "rm "+K_directory+"Chi_profiles_by_K*.png"
            subprocess.call(system_call, shell=True)
    plt.close(fig)

def MakeChiPlotsColouredByLith(DataDirectory, fname_prefix, basin_list=[0], start_movern=0.2, d_movern=0.1, n_movern=7,
                    size_format='ESURF', FigFormat='png', animate=False, keep_pngs=False, parallel=False):
    """
    This function makes chi-elevation plots for each basin and each value of m/n
    where the channels are coloured by the K value (for model runs with spatially varying K).

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        animate (bool): If this is true then it creates a movie of the chi-elevation plots coloured by MLE.
        keep_pngs (bool): If this is false and the animation flag is true, then the pngs are deleted and just the video is kept.

    Returns:
        Plot of each m/n value for each basin.

    Author: FJC
    """
    from LSDPlottingTools import colours
    print("WARNING DEPRECATED FUNCTION, USE THE MakeChiPlotsByLith FROM LSDMapLithoPlotting")

    # check if a directory exists for the chi plots. If not then make it.
    K_directory = DataDirectory+'chi_plots_Lith/'
    if not os.path.isdir(K_directory):
        os.makedirs(K_directory)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)


    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=0.95,top=1.0)
    ax = fig.add_subplot(gs[10:95,5:80])
    #colorbar axis
    ax2 = fig.add_subplot(gs[10:95,82:85])

    # read in the csv files
    if not parallel:
        ProfileDF = Helper.ReadChiProfileCSV(DataDirectory, fname_prefix)
        BasinStatsDF = Helper.ReadBasinStatsCSV(DataDirectory, fname_prefix)
    else:
        ProfileDF = Helper.AppendMovernCSV(DataDirectory,fname_prefix)
        BasinStatsDF = Helper.AppendBasinCSVs(DataDirectory,fname_prefix)

    # get the number of basins
    basin_keys = list(BasinStatsDF['basin_key'])
    basin_keys = [int(x) for x in basin_keys]

    # get the list of basins
    if basin_list == []:
        print("You didn't give me a list of basins, so I'll just run the analysis on all of them!")
        basin_list = basin_keys

    # loop through each m over n value
    end_movern = start_movern+d_movern*(n_movern-1)
    m_over_n_values = np.linspace(start_movern,end_movern,n_movern)

    # best fit moverns
    best_fit_moverns = SimpleMaxMLECheck(BasinStatsDF)

    for m_over_n in m_over_n_values:
        # read in the full stats file
        print("This m/n is: "+str(m_over_n))
        if not parallel:
            FullStatsDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,m_over_n)
        else:
            FullStatsDF = Helper.AppendFullStatsCSVs(DataDirectory,m_over_n,fname_prefix)

        # loop through all the basins in the basin list
        for basin_key in basin_list:
            print("This basin key is %s") %str(basin_key)

            # mask the data frames for this basin
            ProfileDF_basin = ProfileDF[ProfileDF['basin_key'] == basin_key]
            FullStatsDF_basin = FullStatsDF[FullStatsDF['basin_key'] == basin_key]

            #print FullStatsDF_basin
            print ("Getting the reference_source_key")

            print(FullStatsDF_basin.iloc[0]['reference_source_key'])

            # get the data frame for the main stem
            ProfileDF_MS = ProfileDF_basin[ProfileDF_basin['source_key'] == FullStatsDF_basin.iloc[0]['reference_source_key']]

            # get the data frame for the tributaries
            ProfileDF_basin = ProfileDF_basin[ProfileDF_basin['source_key'] != FullStatsDF_basin.iloc[0]['reference_source_key']]
            # merge with the full data to get the MLE for the tributaries
            ProfileDF_tribs = ProfileDF_basin.merge(FullStatsDF_basin, left_on = "source_key", right_on = "test_source_key")

            # get the chi and elevation data for the main stem
            movern_key = 'm_over_n = %s' %(str(m_over_n))
            MainStemX = list(ProfileDF_MS[movern_key])
            MainStemElevation = list(ProfileDF_MS['elevation'])
            MainStemK = list(ProfileDF_MS[fname_prefix+"_geol"])


            # get the chi, elevation, and MLE for the tributaries
            TributariesX = list(ProfileDF_tribs[movern_key])
            TributariesElevation = list(ProfileDF_tribs['elevation'])
            TributariesK = list(ProfileDF_tribs[fname_prefix+"_geol"])


            # get the colourmap to colour channels by the MLE value
            #NUM_COLORS = len(MLE)
            K_array = np.asarray(TributariesK)
            min_K = np.min(K_array)
            max_K = np.max(K_array)
            this_cmap = plt.cm.Spectral
            n_colours = 10
            this_cmap = colours.cmap_discretize(n_colours, this_cmap)
            cNorm  = colors.Normalize(vmin=min_K, vmax=max_K)
            plt.cm.ScalarMappable(norm=cNorm, cmap=this_cmap)

            # now plot the data with a colourmap
            sc = ax.scatter(TributariesX,TributariesElevation,c=TributariesK,cmap=this_cmap, norm=cNorm, s=2.5, edgecolors='none')
            sc = ax.scatter(MainStemX, MainStemElevation,c=MainStemK,cmap=this_cmap, norm=cNorm, s=2.5, edgecolors='none')

            # some formatting of the figure
            ax.spines['top'].set_linewidth(1)
            ax.spines['left'].set_linewidth(1)
            ax.spines['right'].set_linewidth(1)
            ax.spines['bottom'].set_linewidth(1)

            # make the labels
            ax.set_xlabel("$\chi$ (m)")
            ax.set_ylabel("Elevation (m)")

            # the best fit m/n
            best_fit_movern = best_fit_moverns[basin_key]
            print ("BEST FIT M/N IS: "+ str(best_fit_movern))
            print ("THIS M/N IS: "+str(m_over_n))

            # label with the basin and m/n
            title_string = "Basin "+str(basin_key)+", $m/n$ = "+str(m_over_n)
            if best_fit_movern == m_over_n:
                ax.text(0.05, 0.95, title_string,
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='red', fontsize=10)
            else:
                ax.text(0.05, 0.95, title_string,
                        verticalalignment='top', horizontalalignment='left',
                        transform=ax.transAxes,
                        color='black', fontsize=10)

            # add the colorbar
            colorbarlabel = "$Lith$"
            cbar = plt.colorbar(sc,cmap=this_cmap,spacing='uniform', orientation='vertical',cax=ax2)
            cbar.set_label(colorbarlabel, fontsize=10)
            ax2.set_ylabel(colorbarlabel, fontname='Liberation Sans', fontsize=10)

            #change labels to scientific notation
            colours.fix_colourbar_ticks(cbar,n_colours, cbar_type=float, min_value = min_K, max_value = max_K, cbar_label_rotation=0, cbar_orientation='vertical')
            # we need to get linear values between min and max K
            these_labels = np.linspace(min_K,max_K,n_colours)
            # now round these and convert to scientific notation
            these_labels = [str('{:.2e}'.format(float(x))) for x in these_labels]
            new_labels = []
            for label in these_labels:
                a,b = label.split("e")
                b = b.replace("0", "")
                new_labels.append(a+' x 10$^{%s}$' % b)

            ax2.set_yticklabels(new_labels, fontsize=8)

            #save the plot
            newFilename = K_directory+"Chi_profiles_by_Lith_"+str(basin_key)+"_"+str(m_over_n)+"."+str(FigFormat)

            # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
            ax.tick_params(axis='both', width=1, pad = 2)
            for tick in ax.xaxis.get_major_ticks():
                tick.set_pad(2)

            plt.savefig(newFilename,format=FigFormat,dpi=300)
            ax.cla()
            ax2.cla()

    if animate:
        # animate the pngs using ffmpeg
        system_call = "ffmpeg -framerate 3 -pattern_type glob -i '"+K_directory+"Chi_profiles_by_Lith*.png' -y -vcodec libx264 -s 1230x566 -pix_fmt yuv420p "+K_directory+"Chi_profiles_by_Lith.mp4"
        print (system_call)
        subprocess.call(system_call, shell=True)
        # delete the pngs if you want
        if not keep_pngs:
            system_call = "rm "+K_directory+"Chi_profiles_by_Lith*.png"
            subprocess.call(system_call, shell=True)
    plt.close(fig)


def PlotProfilesRemovingOutliers(DataDirectory, fname_prefix, basin_list=[0], start_movern=0.2, d_movern=0.1, n_movern=7, size_format = "geomorphology", FigFormat="png", parallel=False):
    """
    This function is used to plot the chi profiles as they have outliers removed.
    It calls thefunction CheckMLEOutliers, which you should read to get details
    on how outliers are calculated and removed

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.

    Returns:
        Plots of chi profiles with basins removed

    Author: SMM
    """

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)

    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=1.0,top=1.0)
    ax = fig.add_subplot(gs[10:95,5:80])
    #colorbar axis
    ax2 = fig.add_subplot(gs[10:95,82:85])

    # we open the first file just so that we can get a counter list
    if not parallel:
        FirstDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,start_movern)
    else:
        FirstDF = Helper.AppendFullStatsCSVs(DataDirectory,start_movern,fname_prefix)

    # get the number of basins
    basin_keys = list(FirstDF['basin_key'])
    basin_keys = [float(x) for x in basin_keys]

    # get the list of basins
    if basin_list == []:
        print("You didn't give me a list of basins, so I'll just run the analysis on all of them!")
        basin_list = basin_keys
        basin_set = set(basin_list)
        basin_list = list(basin_set)
        basin_list = [int(i) for i in basin_list]

    # First we get all the information about outliers, m/n values and MLE
    # values from the CheckMLEOutliers function
    Outlier_counter, removed_sources_dict, best_fit_movern_dict, MLEs_dict = CheckMLEOutliers(DataDirectory, fname_prefix, basin_list, start_movern, d_movern, n_movern, parallel=parallel)

    # Now get the chi profiles of all the basins and channels
    # Load from file and put into a pandas data frame
    if not parallel:
        ProfileDF = Helper.ReadChiProfileCSV(DataDirectory,fname_prefix)
    else:
        ProfileDF = Helper.AppendMovernCSV(DataDirectory,fname_prefix)

    # now we need to get a plot for each basin, showing the incremental removal of outlying tribs
    for basin_number in basin_list:

        # Get the removed sources indices and the MLEs for this particular basin
        these_removed_sources = removed_sources_dict[basin_number]
        these_MLEs = MLEs_dict[basin_number]

        # loop through the best fit moverns
        # each value represents the MLE for a given number of removed outlying tributaries
        removed_sources_list = []
        for idx,best_fit_movern in enumerate(best_fit_movern_dict[basin_number]):

            print("The best fit m/n is: "+ str(best_fit_movern)+" and the index is "+str(idx))

            # mask the data frames for this basin
            ProfileDF_basin = ProfileDF[ProfileDF['basin_key'] == basin_number]

            # We also need to get the fullstats MLE file. This will be used
            # to colour tribs as well as get the source numbers from
            # the outlier list
            if not parallel:
                FullStatsDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,best_fit_movern)
            else:
                FullStatsDF = Helper.AppendFullStatsCSVs(DataDirectory,best_fit_movern,fname_prefix)

            # mask the data so you only get the correct basin
            FullStatsDF_basin = FullStatsDF[FullStatsDF['basin_key'] == basin_number]

            # extract the relevant data
            #MLE_values = list(FullStatsDF_basin['MLE'])
            #RMSE_values = list(FullStatsDF_basin['RMSE'])
            trib_values = list(FullStatsDF_basin['test_source_key'])
            ref_values = list(FullStatsDF_basin['reference_source_key'])

            # Now get the excluded tributaries for this iteration
            #removed_MLE = these_MLEs[idx]

            # Note that index 0 is the basin with no removed tributaries.
            if idx != 0:
                removed_sources_list.extend(these_removed_sources[idx-1])

            # now you need to get the actual source numbers by indexing into the source list
            the_removed_sources = []
            for source_index in removed_sources_list:
                the_removed_sources.append( trib_values[source_index]  )

            print("The main stem is: ")
            print( ref_values[0])

            print("The removed tribs are: ")
            print(the_removed_sources)



            # get the data frame for the main stem
            # It does this because the main stem source is always the 0 element in the trib_values list
            ProfileDF_MS = ProfileDF_basin[ProfileDF_basin['source_key'] == ref_values[0]]

            # get the data frame for the tributaries
            ProfileDF_basin = ProfileDF_basin[ProfileDF_basin['source_key'] != ref_values[0]]

            # now split the tributaries into exluded and non excluded tribs
            #ProfileDF_outliers = ProfileDF_basin.filter(items=removed_sources_list)
            #ProfileDF_kept = ProfileDF_basin.filter(items=removed_sources_list)
            ProfileDF_outliers = ProfileDF_basin[ProfileDF_basin.source_key.isin(the_removed_sources)]
            ProfileDF_kept = ProfileDF_basin[~ProfileDF_basin.source_key.isin(the_removed_sources)]

            #ProfileDF_basin = ProfileDF_basin[ProfileDF_basin.source_key.isin(removed_sources_list)]
            #ProfileDF_basin = ProfileDF_basin[ProfileDF_basin.source_key.isin([2,3,4])]
            #print(ProfileDF_basin)

            # merge with the full data to get the MLE for the tributaries
            ProfileDF_trib_outliers = ProfileDF_outliers.merge(FullStatsDF_basin, left_on = "source_key", right_on = "test_source_key")
            ProfileDF_trib_kept = ProfileDF_kept.merge(FullStatsDF_basin, left_on = "source_key", right_on = "test_source_key")

            # get the chi and elevation data for the main stem
            movern_key = 'm_over_n = %s' %(str(best_fit_movern))
            MainStemX = list(ProfileDF_MS[movern_key])
            MainStemElevation = list(ProfileDF_MS['elevation'])

            # get the chi, elevation, and MLE for the tributaries
            TributariesX_outliers = list(ProfileDF_trib_outliers[movern_key])
            TributariesElevation_outliers = list(ProfileDF_trib_outliers['elevation'])
            #TributariesMLE_outliers = list(ProfileDF_trib_outliers['MLE'])

            TributariesX_kept = list(ProfileDF_trib_kept[movern_key])
            TributariesElevation_kept = list(ProfileDF_trib_kept['elevation'])
            TributariesMLE_kept = list(ProfileDF_trib_kept['MLE'])

            # now reset the

            # get the colourmap to colour channels by the MLE value
            #NUM_COLORS = len(MLE)
            MLE_array = np.asarray(TributariesMLE_kept)
            this_cmap = plt.cm.Reds
            cNorm  = colors.Normalize(vmin=np.min(MLE_array), vmax=np.max(MLE_array))
            plt.cm.ScalarMappable(norm=cNorm, cmap=this_cmap)

            # now plot the data with a colourmap
            sc = ax.scatter(TributariesX_kept,TributariesElevation_kept,c=TributariesMLE_kept,cmap=this_cmap, norm=cNorm, s=2.5, edgecolors='none')

            # Add the outliers if the basin has them
            if(len(removed_sources_list)>0):
                ax.scatter(TributariesX_outliers,TributariesElevation_outliers,c="b", norm=cNorm, s=2.5, edgecolors='none', alpha = 0.3)

            ax.plot(MainStemX,MainStemElevation,lw=2, c='k')

            # some formatting of the figure
            ax.spines['top'].set_linewidth(1)
            ax.spines['left'].set_linewidth(1)
            ax.spines['right'].set_linewidth(1)
            ax.spines['bottom'].set_linewidth(1)

            # make the lables
            ax.set_xlabel("$\chi$ (m)")
            ax.set_ylabel("Elevation (m)")

            # label with the basin and m/n
            title_string = "Basin "+str(basin_number)+", best fit "+r'$\theta$' + " = str(best_fit_movern)"
            ax.text(0.05, 0.95, title_string,
                    verticalalignment='top', horizontalalignment='left',
                    transform=ax.transAxes,
                    color='black', fontsize=10)

            # add the colorbar
            colorbarlabel = "$MLE$"
            cbar = plt.colorbar(sc,cmap=this_cmap,spacing='uniform', orientation='vertical',cax=ax2)
            cbar.set_label(colorbarlabel, fontsize=10)
            ax2.set_ylabel(colorbarlabel, fontname='Liberation Sans', fontsize=10)

            #save the plot
            newFilename = DataDirectory+"MLE_profiles"+str(basin_number)+"_"+str(best_fit_movern)+"_removed_"+str(idx)+".png"

            # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
            ax.tick_params(axis='both', width=1, pad = 2)
            for tick in ax.xaxis.get_major_ticks():
                tick.set_pad(2)

            plt.savefig(newFilename,format=FigFormat,dpi=300)
            ax.cla()
            ax2.cla()

def PlotMLEWithMOverN(DataDirectory, fname_prefix, basin_list = [0], size_format='ESURF', FigFormat='png', start_movern=0.2, d_movern = 0.1, n_movern = 7, parallel=False):
    """
    This function makes a plot of the MLE values for each m/n showing how the MLE values change
    as you remove the tributaries.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to make the plots for. If an empty list is passed then
        all the basins will be analysed. Default = basin 0.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.

    Returns:
        Plots of MLE values for each m/n

    Author: FJC
    """
    from cycler import cycler
    from matplotlib import lines
    import matplotlib.patches as patches

    # check if a directory exists for the MLE plots. If not then make it.
    MLE_directory = DataDirectory+'MLE_plots/'
    if not os.path.isdir(MLE_directory):
        os.makedirs(MLE_directory)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # set colours for tributaries
    # plt.rc('axes', prop_cycle=(cycler('color', ['k', '0.5', '0.5', '0.5', '0.5']) +
    #                        cycler('linestyle', ['-', '--', ':', '-.', '--'])))

    # make a figure
    fig = makefigure(size_format)


    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=0.85,top=0.9)
    ax = fig.add_subplot(gs[5:100,10:95])

    # we open the first file just so that we can get a counter list
    if not parallel:
        FirstDF = Helper.ReadFullStatsCSV(DataDirectory,fname_prefix,start_movern)
    else:
        FirstDF = Helper.AppendFullStatsCSVs(DataDirectory,start_movern,fname_prefix)

    # get the list of m over n values
    end_movern = start_movern+d_movern*(n_movern-1)
    m_over_n_values = np.linspace(start_movern,end_movern,n_movern)

    # get the number of basins
    basin_keys = list(FirstDF['basin_key'])
    basin_keys = [float(x) for x in basin_keys]

    # get the list of basins
    if basin_list == []:
        print("You didn't give me a list of basins, so I'll just run the analysis on all of them!")
        basin_list = basin_keys
        basin_set = set(basin_list)
        basin_list = list(basin_set)
        basin_list = [int(i) for i in basin_list]

    # First we get all the information about outliers, m/n values and MLE
    # values from the CheckMLEOutliers function
    Outlier_counter, removed_sources_dict, best_fit_movern_dict, MLEs_dict = CheckMLEOutliers(DataDirectory, fname_prefix, basin_list, start_movern, d_movern, n_movern, parallel=parallel)

    # Now get the chi profiles of all the basins and channels
    # Load from file and put into a pandas data frame
    if not parallel:
        ProfileDF = Helper.ReadChiProfileCSV(DataDirectory,fname_prefix)
    else:
        ProfileDF = Helper.AppendMovernCSV(DataDirectory,fname_prefix)

    # get list of line styles for plotting. This is hacky but not sure how else to do this.
    ls = lines.lineStyles.keys()
    ls = ls[3:]
    ls = ls + ls

    # loop through each basin and each number of removed tributaries
    for basin_number in basin_list:

        print ("This basin is: " +str(basin_number))

        # Get the removed sources indices and the MLEs for this particular basin
        basin_removed_sources = removed_sources_dict[basin_number]
        n_removed_sources = len(basin_removed_sources)
        basin_MLEs = MLEs_dict[basin_number]

        # get the best fit m over ns for this basin
        best_fit_moverns = best_fit_movern_dict[basin_number]
        print (best_fit_moverns)

        # colours for each iteration
        #colours = np.linspace(0.9,0.2,len(n_removed_sources)+1)


        # loop through the number of removed tributaires and get the MLE for each m/n for each iteration
        for i in range(n_removed_sources+1):

            # get the MLE of the best fit m over n
            best_fit_movern = best_fit_moverns[i]
            # get the index in the MLE list
            idx = int(round((best_fit_movern - start_movern)/d_movern,0))
            print (idx)
            best_fit_MLE = basin_MLEs[idx][i]
            print ("The best fit MLE is: "+str(best_fit_MLE)+", where "+ r'$\theta$'  ' = '+str(best_fit_movern))

            # get the MLEs for this iteration
            these_MLEs = basin_MLEs[:,i]

            # get the ratio of these MLEs to the best fit
            ratio_MLEs = [x/best_fit_MLE for x in these_MLEs]

            # no removed tributaries
            if i == 0:
                # plot the data
                ax.scatter(m_over_n_values,ratio_MLEs, label = str(i), c='k', s=5, zorder=100)
                ax.plot(m_over_n_values,ratio_MLEs, c='0.75', ls="--")

                # get the limits for the arrow
                max_MLE = max(ratio_MLEs)
                min_MLE = min(ratio_MLEs)
                dy = (max_MLE-min_MLE)/8
                spacing = 1.5
                # add arrow at best fit m/n
                ax.add_patch(
                    patches.Arrow(
                        best_fit_movern, #x
                        max_MLE-(dy*spacing), #y
                        0, #dx
                        dy, #dy
                        width = 0.05,
                        facecolor = 'r',
                        edgecolor = 'r')
                    )
                ax.text(best_fit_movern-0.09, max_MLE-1.5*(dy*spacing), "Best-fit " + r"$\theta$ = "+str(best_fit_movern),fontsize=8, color="r")
            #remove tribs
            #else:
                # plot the data
                #ax.scatter(m_over_n_values,ratio_MLEs, label = str(i), s=5, c=colours[i]) # different linestyle for each iteration?

        # set the axes labels
        ax.set_xlabel(r'$\theta$')
        ax.set_ylabel('MLE')

        # set the ylim
        ax.set_ylim(min_MLE,max_MLE+(dy*spacing))

        # add the legend
        #ax.legend(loc='right', bbox_to_anchor=(1.25,0.5), title = 'Iterations', frameon=False)

        # some formatting of the figure
        ax.spines['top'].set_linewidth(1)
        ax.spines['left'].set_linewidth(1)
        ax.spines['right'].set_linewidth(1)
        ax.spines['bottom'].set_linewidth(1)

        # label with the basin and m/n
        best_fit_movern = best_fit_movern_dict[basin_number][0]
        #title_string = "Basin "+str(basin_number)+"; Best fit $m/n$: "+str(best_fit_movern)
        #ax.set_title(title_string)
        # ax.text(0, 1.1, title_string,
        #     verticalalignment='top', horizontalalignment='left',
        #     transform=ax.transAxes,
        #     color='red', fontsize=10)

        #save the plot
        newFilename = MLE_directory+"MLE_fxn_movern_"+str(basin_number)+"."+FigFormat

        # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
        ax.tick_params(axis='both', width=1, pad = 2)
        for tick in ax.xaxis.get_major_ticks():
            tick.set_pad(2)
        ax.xaxis.set_major_locator(ticker.MultipleLocator(base=0.1))

        plt.savefig(newFilename,format=FigFormat,dpi=300)
        ax.cla()
    plt.close(fig)

def MakeMOverNSummaryPlot(DataDirectory, fname_prefix, basin_list=[], start_movern=0.2, d_movern=0.1, n_movern=7,
                          size_format='ESURF', FigFormat='png',
                          SA_channels=False, show_legend=True,parallel=False,Chi_disorder=False,
                          Chi_all=True,Chi_bootstrap=True, SA_raw=True, SA_segmented=True):
    """
    This function makes a summary plot of the best fit m/n from the different
    methods.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: list of basin keys to analyse, default = [] (all basins)
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        SA_channels (bool): If true, will include the SA data separated by channel
        show_legend (bool): If true, display the legend with the plot
        Chi_disorder (bool): if true, will include the data from the chi disorder method

    Returns:
        Makes a summary plot

    Author: FJC, edited SMM 25/05/2018
    """
    # check if a directory exists for the summary plots. If not then make it.
    summary_directory = DataDirectory+'summary_plots/'
    if not os.path.isdir(summary_directory):
        os.makedirs(summary_directory)

    from matplotlib.ticker import FuncFormatter, MaxNLocator
    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)


    print("SHOW LEGEND", show_legend)

    if show_legend:
        gs = plt.GridSpec(100,100,bottom=0.15,left=0.05,right=0.75,top=0.9)
    else:
        gs = plt.GridSpec(100,100,bottom=0.15,left=0.05,right=0.95,top=0.95)

    ax = fig.add_subplot(gs[5:100,10:95])

    # read in the summary csv
    df = Helper.ReadMOverNSummaryCSV(summary_directory,fname_prefix)
    
    

    if basin_list != []:
        basin_keys = basin_list
    else:
        # get the basin keys
        basin_keys = df['basin_key'].tolist()
        print (basin_keys)

    df = df[df['basin_key'].isin(basin_keys)]

    # plot the full chi data
    full_chi_keys = df['basin_key'].values
    full_chi_keys = full_chi_keys.astype(float) - 0.2
    print("Full chi keys are: ")
    print(full_chi_keys)
    
    # Now check what is in the dataframe
    if 'Chi_MLE_full' in df:
        if Chi_all:
            ax.scatter(full_chi_keys, df['Chi_MLE_full'],marker='o', edgecolors='k', lw=0.5, facecolors='#e34a33', s=15, zorder=200, label='Chi all data')

    # Now see if there is points data
    if 'Chi_MLE_points' in df:
        # plot the points data
        median_movern = df['Chi_MLE_points'].values
        points_max_err = df['Chi_MLE_points_max'].values
        points_max_err = points_max_err.astype(float)-median_movern.astype(float)
        points_min_err = df['Chi_MLE_points_min'].values
        points_min_err = median_movern.astype(float)-points_min_err.astype(float)
        errors = np.array(list(zip(points_min_err, points_max_err))).T
    
        print("The errors or the point data are")
        print(errors)

        print(df['basin_key'].values)
        points_chi_keys = df['basin_key'].values
        points_chi_keys = points_chi_keys.astype(float) - 0.1

        if Chi_bootstrap:
            ax.errorbar(points_chi_keys, df['Chi_MLE_points'], s=15, marker='o', xerr=None, yerr=errors,
                    ecolor='#fdbb84', fmt='none', elinewidth=1,label='_nolegend_')
            ax.scatter(points_chi_keys, df['Chi_MLE_points'], s=15, c='#fdbb84', marker='o', edgecolors='k',
                   lw=0.5,facecolors='#fdbb84', label='Chi bootstrap',zorder=200)

    # plot the chi disorder data if you want it
    if 'Chi_disorder' in df:
        if Chi_disorder:
            median_movern = df['Chi_disorder'].values
            points_max_err = df['Chi_disorder_max'].values
            points_max_err = points_max_err.astype(float)-median_movern.astype(float)
            points_min_err = df['Chi_disorder_min'].values
            points_min_err = median_movern.astype(float)-points_min_err.astype(float)
            errors = np.array(list(zip(points_min_err, points_max_err))).T

            disorder_chi_keys = df['basin_key'].values
            disorder_chi_keys = disorder_chi_keys.astype(float)-0.3
            ax.errorbar(disorder_chi_keys, df['Chi_disorder'], s=15, marker='o', xerr=None, yerr=errors, ecolor='#F06292', fmt='none', elinewidth=1,label='_nolegend_')
            ax.scatter(disorder_chi_keys, df['Chi_disorder'],marker='o', edgecolors='k', lw=0.5, facecolors='#F06292', s=15, zorder=100, label='Chi disorder')


    # plot the SA data
    SA_keys = df['basin_key'].values
    SA_sterr = df['SA_raw_sterr'].values

    if SA_raw:
        ax.errorbar(SA_keys, df['SA_raw'], yerr=SA_sterr, c='#2b8cbe', elinewidth=1, fmt='none',label='_nolegend_')
        ax.scatter(SA_keys, df['SA_raw'], s=15, c='#2b8cbe', edgecolors='k',lw=0.5, label='S-A all data', zorder=100)

    if SA_channels:
        # plot the SA data by tribs
        median_movern = df['SA_tribs'].values
        points_max_err = df['SA_tribs_max'].values
        points_max_err = points_max_err.astype(float)-median_movern.astype(float)
        points_min_err = df['SA_tribs_min'].values
        points_min_err = median_movern.astype(float)-points_min_err.astype(float)
        errors = np.array(list(zip(points_min_err, points_max_err))).T

        SA_tribs_keys = df['basin_key'].values
        SA_tribs_keys = SA_tribs_keys.astype(float)+0.1
        ax.errorbar(SA_tribs_keys, df['SA_tribs'], s=15, marker='D', facecolors='white', xerr=None, yerr=errors, edgecolors='r', fmt='none', elinewidth=1, linestyle = ":", ecolor='r',label='_nolegend_')
        ax.scatter(SA_tribs_keys, df['SA_tribs'], s=15, marker='D', facecolors='white', edgecolors='r', label='S-A by channel',zorder=100)

    # plot the segmented SA data
    median_movern = df['SA_segments'].values
    points_max_err = df['SA_segments_max'].values
    points_max_err = points_max_err.astype(float)-median_movern.astype(float)
    points_min_err = df['SA_segments_min'].values
    points_min_err = median_movern.astype(float)-points_min_err.astype(float)
    errors = np.array(list(zip(points_min_err, points_max_err))).T

    SA_segment_keys = df['basin_key'].values
    SA_segment_keys = SA_segment_keys.astype(float)+0.2
    if SA_segmented:
        ax.errorbar(SA_segment_keys, df['SA_segments'], s=15, marker='o', facecolors='#a6bddb', xerr=None, yerr=errors,
                edgecolors='#a6bddb', fmt='none', elinewidth=1, linestyle = ":", ecolor='#a6bddb',label='_nolegend_')
        ax.scatter(SA_segment_keys, df['SA_segments'], s=15, marker='o', facecolors='#a6bddb', edgecolors='k',
               lw=0.5, label='Segmented S-A', zorder=100)

    # set the axis labels
    ax.set_xlabel('Basin key')
    ax.set_ylabel('Best fit '+r'$\theta$')

    if show_legend:
        print ("ADDING THE LEGEND")
        # sort both labels and handles by labels
        handles, labels = ax.get_legend_handles_labels()
        labels, handles = list(zip(*sorted(list(zip(labels, handles)), key=lambda t: t[0])))
        # add the legend
        ax.legend(handles, labels,fontsize=8, bbox_to_anchor=(1.0,0.7),bbox_transform=plt.gcf().transFigure)

    # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
    ax.tick_params(axis='both', width=1, pad = 2)
    for tick in ax.xaxis.get_major_ticks():
        tick.set_pad(2)

    # change tick spacing
    ax.xaxis.set_major_locator(ticker.MultipleLocator(base=2))
    ax.set_xlim(-1,)
    #ax.yaxis.set_major_locator(ticker.MultipleLocator(base=d_movern))

    # remove first tick from the x axis
    #ax.xaxis.set_major_locator(MaxNLocator(prune='lower'))

    #set y axis lims
    #end_movern = end_movern = start_movern+d_movern*(n_movern-1)
    #ax.set_ylim(start_movern-d_movern,1.3)

    # change y axis to the moverns tested
    # end_movern = end_movern = start_movern+d_movern*(n_movern-1)
    # print end_movern
    # ax.yaxis.set_ticks(np.arange(start_movern, end_movern, d_movern))


    newFilename = summary_directory+fname_prefix+"_"

    if Chi_all:
        newFilename = newFilename+"A"
    if Chi_bootstrap:
        newFilename = newFilename+"B"
    if Chi_disorder:
        newFilename = newFilename+"D"
    if SA_raw:
        newFilename = newFilename+"S"
    if SA_channels:
        newFilename = newFilename+"C"
    if SA_segmented:
        newFilename = newFilename+"G"

    newFilename = newFilename+"_movern_summary."+FigFormat


    plt.savefig(newFilename,format=FigFormat,dpi=300)
    ax.cla()
    plt.close(fig)

def MakeMOverNPlotOneMethod(DataDirectory, fname_prefix, basin_list=[], start_movern=0.2, d_movern=0.1, n_movern=7,
                            size_format='ESURF', FigFormat='png', movern_method='chi_points'):
        """
        This function makes a summary plot of the best fit m/n, you choose which method
        you want to plot.

        Args:
            DataDirectory (str): the data directory with the m/n csv files
            fname_prefix (str): The prefix for the m/n csv files
            basin_list: list of basin keys to analyse, default = [] (all basins)
            start_movern (float): the starting m/n value. Default is 0.2
            d_movern (float): the increment between the m/n values. Default is 0.1
            n_movern (float): the number of m/n values analysed. Default is 7.
            size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
            FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
            movern_method (str): the method you want to plot. Can be 'chi_all', 'chi_points', 'chi_disorder', SA_raw', or 'SA_segments'. Default 'chi_points'
        Returns:
            Makes a summary plot

        Author: FJC edit SMM 15/11/2019
        """
        # check if a directory exists for the summary plots. If not then make it.
        summary_directory = DataDirectory+'summary_plots/'
        if not os.path.isdir(summary_directory):
            os.makedirs(summary_directory)

        from matplotlib.ticker import FuncFormatter, MaxNLocator
        # Set up fonts for plots
        label_size = 10
        rcParams['font.family'] = 'sans-serif'
        rcParams['font.sans-serif'] = ['Liberation Sans']
        rcParams['font.size'] = label_size

        # make a figure
        fig = makefigure(size_format)


        gs = plt.GridSpec(100,100,bottom=0.15,left=0.05,right=0.95,top=0.95)

        ax = fig.add_subplot(gs[5:100,10:95])

        # read in the summary csv
        df = Helper.ReadMOverNSummaryCSV(summary_directory,fname_prefix)
        print (df)

        if basin_list != []:
            basin_keys = basin_list
        else:
            # get the basin keys
            basin_keys = df['basin_key'].tolist()
            print (basin_keys)

        df = df[df['basin_key'].isin(basin_keys)]

        # plot the full chi data
        if movern_method=='chi_all':
            full_chi_keys = df['basin_key'].values
            full_chi_keys = full_chi_keys.astype(float)-0.2
            ax.scatter(full_chi_keys, df['Chi_MLE_full'],marker='o', edgecolors='k', lw=0.5, facecolors='#e34a33', s=15, zorder=200, label='Chi all data')

        elif movern_method=='chi_points':
            # plot the points data
            median_movern = df['Chi_MLE_points'].values
            points_max_err = df['Chi_MLE_points_max'].values
            points_max_err = points_max_err-median_movern
            points_min_err = df['Chi_MLE_points_min'].values
            points_min_err = median_movern-points_min_err
            errors = np.array(zip(points_min_err, points_max_err)).T

            points_chi_keys = df['basin_key'].values # -0.1 I removed that to fix the movern plots - Boris
            ax.errorbar(points_chi_keys, df['Chi_MLE_points'], s=15, marker='o', xerr=None, yerr=errors, ecolor='#fdbb84', fmt='none', elinewidth=1,label='_nolegend_')
            ax.scatter(points_chi_keys, df['Chi_MLE_points'], s=15, c='#fdbb84', marker='o', edgecolors='k', lw=0.5,facecolors='#fdbb84', label='Chi bootstrap',zorder=200)

            # plot the SA data
        elif movern_method=='SA_raw':
                SA_keys = df['basin_key'].values
                SA_sterr = df['SA_raw_sterr'].values
                ax.errorbar(SA_keys, df['SA_raw'], yerr=SA_sterr, c='#2b8cbe', elinewidth=1, fmt='none',label='_nolegend_')
                ax.scatter(SA_keys, df['SA_raw'], s=15, c='#2b8cbe', edgecolors='k',lw=0.5, label='S-A all data', zorder=100)

                
        # plot the chi disorder data if you want it
        elif movern_method=='chi_disorder':
            median_movern = df['Chi_disorder'].values
            points_max_err = df['Chi_disorder_max'].values
            points_max_err = points_max_err.astype(float)-median_movern.astype(float)
            points_min_err = df['Chi_disorder_min'].values
            points_min_err = median_movern.astype(float)-points_min_err.astype(float)
            errors = np.array(list(zip(points_min_err, points_max_err))).T

            disorder_chi_keys = df['basin_key'].values
            disorder_chi_keys = disorder_chi_keys.astype(float)-0.3
            ax.errorbar(disorder_chi_keys, df['Chi_disorder'], s=15, marker='o', xerr=None, yerr=errors, ecolor='#F06292', fmt='none', elinewidth=1,label='_nolegend_')
            ax.scatter(disorder_chi_keys, df['Chi_disorder'],marker='o', edgecolors='k', lw=0.5, facecolors='#F06292', s=15, zorder=100, label='Chi disorder')                
                
        else:
            # plot the segmented SA data
            median_movern = df['SA_segments'].values
            points_max_err = df['SA_segments_max'].values
            points_max_err = points_max_err-median_movern
            points_min_err = df['SA_segments_min'].values
            points_min_err = median_movern-points_min_err
            errors = np.array(zip(points_min_err, points_max_err)).T

            SA_segment_keys = df['basin_key'].values
            SA_segment_keys=SA_segment_keys.astype(float)+0.2
            ax.errorbar(SA_segment_keys, df['SA_segments'], s=15, marker='o', facecolors='#a6bddb', xerr=None, yerr=errors, edgecolors='#a6bddb', fmt='none', elinewidth=1, linestyle = ":", ecolor='#a6bddb',label='_nolegend_')
            ax.scatter(SA_segment_keys, df['SA_segments'], s=15, marker='o', facecolors='#a6bddb', edgecolors='k', lw=0.5, label='Segmented S-A', zorder=100)

        # set the axis labels
        ax.set_xlabel('Basin key')
        ax.set_ylabel('Best fit '+r'$\theta$')

        # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
        ax.tick_params(axis='both', width=1, pad = 2)
        for tick in ax.xaxis.get_major_ticks():
            tick.set_pad(2)

        # change tick spacing
        ax.xaxis.set_major_locator(ticker.MultipleLocator(base=1))
        ax.set_xlim(-0.5,df['basin_key'].max()+0.5)

        newFilename = summary_directory+fname_prefix+"_movern_"+movern_method+"."+FigFormat
        plt.savefig(newFilename,format=FigFormat,dpi=300)
        ax.cla()
        plt.close(fig)

        
        
def MakeMOverNDisorderDistancePlot(DataDirectory, fname_prefix, basin_list_list=[], start_movern=0.2, d_movern=0.1, n_movern=7,
                            size_format='ESURF', FigFormat='png', show_legend=True,parallel=False, group_names=[]): 
    """
    This function makes a summary plot of the best fit m/n, you choose which method
    you want to plot.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: list of basin keys to analyse, default = [] (all basins)
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        movern_method (str): the method you want to plot. Can be 'chi_all', 'chi_points', 'chi_disorder', SA_raw', or 'SA_segments'. Default 'chi_points'
    Returns:
        Makes a summary plot

    Author: SMM 15/11/2019
    """
    # check if a directory exists for the summary plots. If not then make it.
    summary_directory = DataDirectory+'summary_plots/'
    if not os.path.isdir(summary_directory):
        os.makedirs(summary_directory)

    from matplotlib.ticker import FuncFormatter, MaxNLocator
    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)


    print("SHOW LEGEND", show_legend)

    if show_legend:
        gs = plt.GridSpec(100,100,bottom=0.15,left=0.05,right=0.75,top=0.9)
    else:
        gs = plt.GridSpec(100,100,bottom=0.15,left=0.05,right=0.95,top=0.95)

    ax = fig.add_subplot(gs[5:100,10:95])

    # read in the summary csv
    df = Helper.ReadMOverNSummaryCSV(summary_directory,fname_prefix)
    
    # you also need the basin info
    df_basin_info = Helper.ReadBasinInfoCSV(DataDirectory,fname_prefix)
    
    #print("The data frame is")
    #print(df)
    
    #print("The basin info df is:")
    #print(df_basin_info)
    
    # Now merge the two dataframes
    df_new = pd.merge(df,df_basin_info,on='basin_key')
    df = df_new
    
    #print("The new dataframe is")
    #print(df)

    if basin_list_list != []:
        basin_keys_list = basin_list_list
    else:
        # get the basin keys
        basin_keys_list = []
        basin_keys = df['basin_key'].tolist()
        basin_keys_list.append(basin_keys)
        print("You didn't select basin keys. I am picking all of them:")
        print (basin_keys_list)

    # This just makes a colour list for plotting
    colour_list = []
    colour_list.append('#e41a1c')
    colour_list.append('#377eb8')
    colour_list.append('#4daf4a')
    colour_list.append('#984ea3')
    colour_list.append('#ff7f00')
    colour_list.append('#ffff33')
    colour_list.append('#a65628')
    colour_list.append('#f781bf')
    colour_list.append('#999999')
    
    tab20_cm = cm.get_cmap('tab20')
    
        
    # Loop through the basin key list
    colour_index = 0;
    for basin_keys in basin_list_list:
        
        
        print("\n\n\nI am selecting the following basins")
        print(basin_keys)
        print("The colour indexs is: "+str(colour_index))
        
        this_df = df[df['basin_key'].isin(basin_keys)]

        # plot the chi disorder data if you want it. This will fail if the 
        
        if 'Chi_disorder' in this_df:
            colour_index_mod = colour_index % 9
            if group_names == []:
                label_name = "Group "+str(colour_index)
            else:
                label_name = group_names[colour_index]
            median_movern = this_df['Chi_disorder'].values
            points_max_err = this_df['Chi_disorder_max'].values
            points_max_err = points_max_err.astype(float)-median_movern.astype(float)
            points_min_err = this_df['Chi_disorder_min'].values
            points_min_err = median_movern.astype(float)-points_min_err.astype(float)
            errors = np.array(list(zip(points_min_err, points_max_err))).T
            lat_vals =  this_df['latitude'].values
            long_vals =  this_df['longitude'].values

            disorder_chi_keys = this_df['basin_key'].values
            disorder_chi_keys = disorder_chi_keys.astype(float)-0.3
            ax.errorbar(long_vals, this_df['Chi_disorder'], s=15, marker='o', xerr=None, yerr=errors, ecolor=colour_list[colour_index_mod], fmt='none', elinewidth=1,label='_nolegend_')
            ax.scatter(long_vals, this_df['Chi_disorder'],marker='o', edgecolors='k', lw=0.5, facecolors=colour_list[colour_index_mod], s=15, zorder=100, label=label_name)
        colour_index = colour_index+1



    # set the axis labels
    ax.set_xlabel('Longitude (decimal degrees)')
    ax.set_ylabel('Best fit '+r'$\theta$')

    if show_legend:
        print ("ADDING THE LEGEND")
        # sort both labels and handles by labels
        handles, labels = ax.get_legend_handles_labels()
        labels, handles = list(zip(*sorted(list(zip(labels, handles)), key=lambda t: t[0])))
        # add the legend
        ax.legend(handles, labels,fontsize=8, bbox_to_anchor=(1.0,0.7),bbox_transform=plt.gcf().transFigure)

    # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
    ax.tick_params(axis='both', width=1, pad = 2)
    for tick in ax.xaxis.get_major_ticks():
        tick.set_pad(2)

    # change tick spacing
    #ax.xaxis.set_major_locator(ticker.MultipleLocator(base=2))
    #ax.set_xlim(-1,)
    #ax.yaxis.set_major_locator(ticker.MultipleLocator(base=d_movern))

    # remove first tick from the x axis
    #ax.xaxis.set_major_locator(MaxNLocator(prune='lower'))

    #set y axis lims
    #end_movern = end_movern = start_movern+d_movern*(n_movern-1)
    #ax.set_ylim(start_movern-d_movern,1.3)

    # change y axis to the moverns tested
    # end_movern = end_movern = start_movern+d_movern*(n_movern-1)
    # print end_movern
    # ax.yaxis.set_ticks(np.arange(start_movern, end_movern, d_movern))


    newFilename = summary_directory+fname_prefix+"_concavity_dist."+FigFormat
    plt.savefig(newFilename,format=FigFormat,dpi=300)
    ax.cla()
    plt.close(fig)    
        
        
def MakeMOverNSummaryHistogram(DataDirectory, fname_prefix, basin_list=[], size_format='ESURF', FigFormat='png', start_movern=0.1, n_movern=7, d_movern=0.1, mn_method = "Chi", show_legend=True, parallel=False, Chi_disorder=False):
    """
    This function makes a stacked histogram showing the distribution of concavity (~m/n) values for each method

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        start_movern (float): the starting m/n value. Default is 0.2
        d_movern (float): the increment between the m/n values. Default is 0.1
        n_movern (float): the number of m/n values analysed. Default is 7.
        mn_method (str): the method of calculating m/n, either "Chi", or "SA". Default "Chi"
        show_legend (bool): if true, add a legend to the plot (default=True)
        Chi_disorder(bool): if true, add the chi disorder analysis

    Returns:
        Histogram of the m/n values

    Author: FJC
    """

    # check if a directory exists for the summary plots. If not then make it.
    summary_directory = DataDirectory+'summary_plots/'
    if not os.path.isdir(summary_directory):
        os.makedirs(summary_directory)

    from matplotlib.ticker import FuncFormatter, MaxNLocator
    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)
    figsize = fig.get_size_inches()

    # if show_legend:
    #     gs = plt.GridSpec(100,100,bottom=0.15,left=0.05,right=0.75,top=0.9)
    # else:
    #     gs = plt.GridSpec(100,100,bottom=0.1,left=0.05,right=0.95,top=0.95)
    #
    # ax = fig.add_subplot(gs[5:100,10:95])

    # read in the summary csv
    if not parallel:
      df = Helper.ReadMOverNSummaryCSV(summary_directory,fname_prefix)
    else:
      print ("MakeMOverNSummaryHistogram not parallelised yet")
      return
    
    Chi_disorder_only = False
    if Chi_disorder: 
        if 'Chi_MLE_points' not in df:
            print("I didn't find a bootstrap data line. Switching to disorder only")
            Chi_disorder_only= True

    # get the basin keys
    basin_keys = df['basin_key'].tolist()
    print (basin_keys)
    if Chi_disorder_only:
        columns = ['Chi_disorder', 'SA_raw', 'SA_segments']
        these_labels = ['Chi disorder', 'S-A all data', 'Segmented S-A']
        colours = ['#F06292', '#2b8cbe', '#a6bddb']
    else:
        if Chi_disorder:
            columns = ['Chi_MLE_full', 'Chi_MLE_points', 'Chi_disorder', 'SA_raw', 'SA_segments']
            these_labels = ['Chi all data', 'Chi bootstrap', 'Chi disorder', 'S-A all data', 'Segmented S-A']
            colours = ['#e34a33', '#fdbb84', '#F06292', '#2b8cbe', '#a6bddb']

        else:
            columns = ['Chi_MLE_full', 'Chi_MLE_points', 'SA_raw', 'SA_segments']
            these_labels = ['Chi all data', 'Chi bootstrap', 'S-A all data', 'Segmented S-A']
            colours = ['#e34a33', '#fdbb84', '#2b8cbe', '#a6bddb']
            
    x_spacing = 0.05
    fig, ax = joyplot.joyplot(df, figsize=figsize, column=columns, label_strings=these_labels, x_range=[0,1],grid="x",color=colours,x_title='Best fit '+r'$\theta$' +' distribution',x_spacing=x_spacing)
    #plt.xlabel('Best fit $m/n$')
    
    # change tick spacing
    x_loc = np.arange(0,1.01,0.05)
    print("x_loc is:")
    print(x_loc)
    labels = ["0","","","","0.2","","","","0.4","","","","0.6","","","","0.8","","","","1"]
    plt.xticks(x_loc, labels, rotation='vertical')
    

    newFilename = summary_directory+fname_prefix+"_movern_hist."+FigFormat

    plt.savefig(newFilename,format=FigFormat,dpi=300)
    plt.close(fig)

def PlotMOverNByBasin(DataDirectory, fname_prefix, basin_list = [], size_format='ESURF', FigFormat='png', colour_points=False, column_header="Lithology"):
    """
    This function makes a plot of the best-fit m/n value calculated using the chi Monte Carlo points analysis
    for each basin. The points are coloured by another value which is specified by the user (e.g. lithology).
    The column header must be specified by the user.

    This is a work in progress. I haven't tested it yet and wrote it on the plane.
    I need to think about what is the best way to read in the lithology - will we have a string
    for each lithology? Then we want to map each string to a different colour.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        basin_list: a list of the basins to analyse
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        colour_points (bool): boolean to decide whether to colour the points by another value, e.g. lithology. Default = False
        column_header (str): if colour_points=True, then use this to tell us what column header in the csv you want to use

    Returns:
        m/n plot by basin_list

    Author: FJC
    """

    # check if a directory exists for the summary plots. If not then make it.
    summary_directory = DataDirectory+'summary_plots/'
    if not os.path.isdir(summary_directory):
        os.makedirs(summary_directory)

    # read in the summary csv file
    df = Helper.ReadMOverNSummaryCSV(DataDirectory,fname_prefix)

    # get the basin keys
    if basin_list == []:
        basin_list = df['basin_keys'].tolist()

    # mask the dataframe for the required basin keys
    df = df[df['basin_key'].isin(basin_list)]

    columns = df.columns()

    # set up the figure
    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)


    gs = plt.GridSpec(100,100,bottom=0.1,left=0.05,right=0.95,top=0.95)
    ax = fig.add_subplot(gs[5:100,10:95])

    # check if the user wants to colour the points by a specific columns
    if colour_points:
        if not column_header.isin(columns):
            print ("The column header name does not exist! Please check your dataframe")
        else:
            values_to_colour = df[column_header].tolist()
            this_cmap = plt.cm.Set2
            ax.errorbar(basin_list, df['Chi_MLE_points'], s=20, marker='o', xerr=None, yerr=errors, ecolor=values_to_colour, cmap=this_cmap, fmt='none', elinewidth=1,label='_nolegend_')
            ax.scatter(basin_list, df['Chi_MLE_points'], s=20, c=values_to_colour, cmap=this_cmap, marker='o', edgecolors=None, lw=0.5, label='Chi bootstrap',zorder=200)
    else:
        ax.errorbar(basin_list, df['Chi_MLE_points'], s=20, marker='o', xerr=None, yerr=errors, ecolor='#fdbb84', fmt='none', elinewidth=1,label='_nolegend_')
        ax.scatter(basin_list, df['Chi_MLE_points'], s=20, c='#fdbb84', marker='o', edgecolors='k', lw=0.5,facecolors='#fdbb84', label='Chi bootstrap',zorder=200)

    # set the axis labels
    ax.set_xlabel('Basin key')
    ax.set_ylabel('Best fit '+r'$\theta$')

    # This gets all the ticks, and pads them away from the axis so that the corners don't overlap
    ax.tick_params(axis='both', width=1, pad = 2)
    for tick in ax.xaxis.get_major_ticks():
        tick.set_pad(2)

    # change tick spacing
    ax.xaxis.set_major_locator(ticker.MultipleLocator(base=1))

    newFilename = summary_directory+fname_prefix+"_movern_summary."+FigFormat
    if SA_channels:
        newFilename = summary_directory+fname_prefix+"_movern_summary_with_SA_tribs."+FigFormat
    plt.savefig(newFilename,format=FigFormat,dpi=300)
    ax.cla()
    plt.close(fig)



# def MakeBasinJoyplot(DataDirectory, fname_prefix, basin_list=[], size_format='ESURF', FigFormat='png'):
#     """
#     This function makes a joyplot showing the m/n value for a list of basins. m/n value
#     is calculated using the chi points method.
#
#     Args:
#         DataDirectory (str): the data directory with the m/n csv files
#         fname_prefix (str): The prefix for the m/n csv files
#         basin_list: a list of the basins to analyse
#         size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
#         FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
#
#     Returns:
#         Joyplot of the m/n values for each basin
#
#     Author: FJC
#     """
#
#     # check if a directory exists for the summary plots. If not then make it.
#     summary_directory = DataDirectory+'summary_plots/'
#     if not os.path.isdir(summary_directory):
#         os.makedirs(summary_directory)
#
#     from matplotlib.ticker import FuncFormatter, MaxNLocator
#     # Set up fonts for plots
#     label_size = 10
#     rcParams['font.family'] = 'sans-serif'
#     rcParams['font.sans-serif'] = ['Liberation Sans']
#     rcParams['font.size'] = label_size
#
#     # make a figure
#     if size_format == "geomorphology":
#         #fig = plt.figure(1, facecolor='white',figsize=(6.25,3.5))
#         figsize=(6.25,3.5)
#         #l_pad = -40
#     elif size_format == "big":
#         #fig = plt.figure(1, facecolor='white',figsize=(16,9))
#         figsize=(16,9)
#         #l_pad = -50
#     else:
#         #fig = plt.figure(1, facecolor='white',figsize=(4.92126,3.2))
#         figsize=(4.92126,3.2)
#         #l_pad = -35
#
#     # if show_legend:
#     #     gs = plt.GridSpec(100,100,bottom=0.15,left=0.05,right=0.75,top=0.9)
#     # else:
#     #     gs = plt.GridSpec(100,100,bottom=0.1,left=0.05,right=0.95,top=0.95)
#     #
#     # ax = fig.add_subplot(gs[5:100,10:95])
#
#     # read in the summary csv
#     df = Helper.ReadMOverNSummaryCSV(summary_directory,fname_prefix)
#     print df
#
#     # get the basin keys
#     if basin_list == []:
#         print "You didn't give me a list of basins so I'll analyse all of them"
#         basin_list = df['basin_key'].tolist()
#
#     df = df[df['basin_key'].isin(basin_list)]
#
#     columns = ['basin_key']
#     #these_labels = ['Chi all data', 'Chi Monte Carlo', 'S-A all data', 'Segmented S-A']
#     #colours = ['#e34a33', '#fdbb84', '#2b8cbe', '#a6bddb']
#     x_spacing = 0.2
#     fig, ax = joyplot.joyplot(df, figsize=figsize, column=columns, x_range=[0,1],grid="x",x_title='Best fit $m/n$ distribution',x_spacing=x_spacing)
#     #plt.xlabel('Best fit $m/n$')
#
#     newFilename = summary_directory+fname_prefix+"_basin_joyplots."+FigFormat
#
#     plt.savefig(newFilename,format=FigFormat,dpi=300)
#     plt.close(fig)

#=============================================================================
# RASTER PLOTTING FUNCTIONS
# Functions that interface with LSDMapFigure to plot the m/n analysis with
# spatial data.
#=============================================================================
def MakeRasterPlotsBasins(DataDirectory, fname_prefix, size_format='ESURF', FigFormat='png', parallel=False):
    """
    This function makes a shaded relief plot of the DEM with the basins coloured
    by the basin ID.

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.

    Returns:
        Shaded relief plot with the basins coloured by basin ID

    Author: FJC
    """
    # check if a directory exists for the chi plots. If not then make it.
    raster_directory = DataDirectory+'raster_plots/'
    if not os.path.isdir(raster_directory):
        os.makedirs(raster_directory)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # set figure sizes based on format
    # make a figure
    fig = makefigure(size_format)
    fig_width_inches = fig.get_size_inches()[0]

    # get the basin IDs to make a discrete colourmap for each ID
    if not parallel:
        BasinInfoDF = Helper.ReadBasinInfoCSV(DataDirectory, fname_prefix)
    else:
        BasinInfoDF = Helper.AppendBasinInfoCSVs(DataDirectory,fname_prefix)

    print (BasinInfoDF)

    basin_keys = list(BasinInfoDF['basin_key'])
    basin_keys = [int(x) for x in basin_keys]

    basin_junctions = list(BasinInfoDF['outlet_junction'])
    basin_junctions = [int(x) for x in basin_junctions]

    print ('Basin keys are: ')
    print (basin_keys)

    # get a discrete colormap
    cmap = plt.cm.jet

    # going to make the basin plots - need to have bil extensions.
    print("I'm going to make the basin plots. Your topographic data must be in ENVI bil format or I'll break!!")

    # get the rasters
    raster_ext = '.bil'
    BackgroundRasterName = fname_prefix+raster_ext
    HillshadeName = fname_prefix+'_hs'+raster_ext
    BasinsName = fname_prefix+'_AllBasins'+raster_ext

    # create the map figure
    MF = MapFigure(HillshadeName, DataDirectory,coord_type="UTM_km", colourbar_location='None')

    # add the basins drape
    MF.add_basin_plot(BasinsName,fname_prefix,DataDirectory,
                      use_keys_not_junctions = True, show_colourbar = True,
                      discrete_cmap=True, n_colours=len(basin_keys), colorbarlabel = "Basin ID",
                      colourmap = cmap, adjust_text = False, parallel=parallel)

    # add the basin outlines ### need to parallelise
    if not parallel:
      Basins = LSDP.GetBasinOutlines(DataDirectory, BasinsName)
    else:
      Basins = LSDP.GetMultipleBasinOutlines(DataDirectory)

    MF.plot_polygon_outlines(Basins, linewidth=0.8)

    # add the channel network
    if not parallel:
        ChannelDF = Helper.ReadChiDataMapCSV(DataDirectory,fname_prefix)
    else:
        ChannelDF = Helper.AppendChiDataMapCSVs(DataDirectory,fname_prefix)
    ChannelPoints = LSDP.LSDMap_PointData(ChannelDF, data_type = "pandas", PANDEX = True)
    MF.add_point_data(ChannelPoints,show_colourbar="False", scale_points=True, column_for_scaling='drainage_area',alpha=0.5,zorder=100)

    # add the basin labelling
    label_dict = dict(zip(basin_junctions,basin_keys))

    if not parallel:
      Points = LSDP.GetPointWithinBasins(DataDirectory, BasinsName)
    else:
      Points = LSDP.GetPointsWithinMultipleBasins(DataDirectory, BasinsName)

    MF.add_text_annotation_from_shapely_points(Points, text_colour='k', label_dict=label_dict, zorder=200)

    # Save the figure
    ImageName = raster_directory+fname_prefix+'_basin_keys.'+FigFormat
    MF.save_fig(fig_width_inches = fig_width_inches, FigFileName = ImageName, FigFormat=FigFormat, Fig_dpi = 300)

def MakeRasterPlotsMOverN(DataDirectory, fname_prefix, start_movern=0.2, n_movern=7, d_movern=0.1, movern_method='Chi_full', size_format='ESURF', FigFormat='png',lith = False, parallel=False):
    """
    This function makes a shaded relief plot of the DEM with the basins coloured
    by the best fit m/n

    Args:
        DataDirectory (str): the data directory with the m/n csv files
        fname_prefix (str): The prefix for the m/n csv files
        start_movern (int): The starting m/n, default = 0.2
        n_movern (int): The number of m/n values tested, default = 7.
        movern_method: the method of estimating m over n. Options are full chi "Chi_full", points "Chi_points", or slope-area "SA". Default is "Chi_full".
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        lith (bool): the lithologic information if available. You need to follow the lithologic documentation to generate the right files. (This last does not exists yet).

    Returns:
        Shaded relief plot with the basins coloured by best fit m/n

    Author: FJC
    """
    # check if a directory exists for the chi plots. If not then make it.
    raster_directory = DataDirectory+'raster_plots/'
    if not os.path.isdir(raster_directory):
        os.makedirs(raster_directory)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)
    fig_width_inches = fig.get_size_inches()[0]

    # get the basin IDs to make a discrete colourmap for each ID
    if not parallel:
        BasinDF = Helper.ReadBasinStatsCSV(DataDirectory,fname_prefix)
        # get the basin IDs to make a discrete colourmap for each ID
        BasinInfoDF = Helper.ReadBasinInfoCSV(DataDirectory, fname_prefix)
    else:
        BasinDF = Helper.AppendBasinCSVs(DataDirectory,fname_prefix)
        BasinInfoDF = Helper.AppendBasinInfoCSVs(DataDirectory,fname_prefix)

    basin_keys = list(BasinInfoDF['basin_key'])
    basin_keys = [int(x) for x in basin_keys]

    basin_junctions = list(BasinInfoDF['outlet_junction'])
    basin_junctions = [int(x) for x in basin_junctions]

    print ('Basin keys are: ')
    print (basin_keys)

    labeldict = {}
    # get the best fit m/n for each basin
    if movern_method == "Chi_full":
        print ('Making a plot for the full chi method')
        Outlier_counter, removed_sources_dict, best_fit_movern_dict, MLEs_dict = CheckMLEOutliers(DataDirectory, fname_prefix, basin_list=basin_keys, start_movern=start_movern, d_movern=d_movern, n_movern=n_movern, parallel=parallel)
        #MOverNDict = SimpleMaxMLECheck(BasinDF)
        m_over_ns = [round(i[0],2) for i in best_fit_movern_dict.values()]
        print ('You tested these m_over_ns')
        print (m_over_ns)
        MOverNDict = dict(zip(basin_keys,m_over_ns))
        label_list = [str(i)+": "+str(j) for i,j in zip(basin_keys,m_over_ns)]
        labeldict = dict(zip(basin_junctions,label_list))
        ImageName = raster_directory+fname_prefix+'_basins_movern_chi_full.'+FigFormat
        title = "Chi analysis (all data)"
    elif movern_method == "Chi_points":
        print ('Making a plot for the chi points method')
        if not parallel:
            PointsChiBasinDF = Helper.ReadMCPointsCSV(DataDirectory,fname_prefix)
        else:
            PointsChiBasinDF = Helper.AppendBasinPointCSVs(DataDirectory,fname_prefix)

        PointsDF = GetMOverNRangeMCPoints(PointsChiBasinDF,start_movern,d_movern,n_movern)
        moverns = PointsDF['Median_MOverNs'].tolist()
        MOverNDict = dict(zip(basin_keys,moverns))
        # labelling the basins
        moverns = [round(i,2) for i in moverns]
        label_list = [str(i)+": "+str(j) for i,j in zip(basin_keys,moverns)]
        labeldict = dict(zip(basin_junctions,label_list))
        ImageName = raster_directory+fname_prefix+'_basins_movern_chi_points.'+FigFormat
        title = "Chi bootstrap analysis"
    elif movern_method == "SA":
        SlopeAreaDF = SA.LinearRegressionRawData(DataDirectory,fname_prefix,parallel=parallel)
        moverns = SlopeAreaDF['regression_slope'].tolist()
        MOverNDict = dict(zip(basin_keys,moverns))
        # labelling the basins
        moverns = [round(i,2) for i in moverns]
        label_list = [str(i)+": "+str(j) for i,j in zip(basin_keys,moverns)]
        labeldict = dict(zip(basin_junctions,label_list))
        ImageName = raster_directory+fname_prefix+'_basins_movern_SA.'+FigFormat
        title = "Slope-area analysis"
    elif movern_method == "Chi_disorder":
        #DisorderDF = Helper.ReadDisorderCSV(DataDirectory, fname_prefix)
        if not parallel:
            DisorderDF = Helper.ReadDisorderUncertCSV(DataDirectory, fname_prefix)
        else:
            DisorderDF = Helper.AppendDisorderCSV(DataDirectory, fname_prefix)
        m_over_ns = DisorderDF['median'].tolist()
        MOverNDict = dict(zip(basin_keys,m_over_ns))
        moverns = [round(i,2) for i in m_over_ns]
        label_list = [str(i)+": "+str(j) for i,j in zip(basin_keys,moverns)]
        labeldict = dict(zip(basin_junctions,label_list))
        ImageName = raster_directory+fname_prefix+'_basins_movern_chi_disorder.'+FigFormat
        title = "Chi disorder analysis"

    else:
        print ("You didn't select an appropriate movern method. Please choose either 'Chi_full', 'Chi_points, or 'SA'.")
        sys.exit()

    # get moverns for cbar plotting. We always want a spacing of 0.1.
    min_max_str = ['min', 'max']
    end_movern = float(start_movern)+float(d_movern)*(float(n_movern)-1)
    all_moverns = np.linspace(start_movern,end_movern,n_movern)
    print ("END MOVERN:")
    print (end_movern)
    min_max_moverns = [start_movern, end_movern]
    cbar_dict = dict(zip(min_max_str,min_max_moverns))

    # work out how many colours we need for the colormap
    n_colours = len(all_moverns)
    print ("N colours is: "+str(n_colours))

    # get a discrete colormap
    mn_cmap = plt.cm.Reds

    # going to make the basin plots - need to have bil extensions.
    print("I'm going to make the basin m/n plots. Your topographic data must be in ENVI bil format or I'll break!!")

    # get the rasters
    raster_ext = '.bil'
    BackgroundRasterName = fname_prefix+raster_ext
    HillshadeName = fname_prefix+'_hs'+raster_ext
    BasinsName = fname_prefix+'_AllBasins'+raster_ext

    print ("THE TITLE IS:"+title)
    # create the map figure
    MF = MapFigure(HillshadeName, DataDirectory,coord_type="UTM_km", plot_title=title)
    # add the basins drape
    if lith:

        LithoMap = fname_prefix+"_LITHRAST"+raster_ext
        df_litho = pd.read_csv(DataDirectory+fname_prefix+"_lithokey.csv")
        MF.add_drape_image(LithoMap,DataDirectory,colourmap = plt.cm.jet,
                                alpha=0.4,
                                show_colourbar = False,
                                colorbarlabel = "Colourbar", discrete_cmap=True, n_colours=df_litho.shape[0],
                                norm = "None",
                                colour_min_max = [],
                                modify_raster_values=False,
                                old_values=[], new_values=[], cbar_type=int,
                                NFF_opti = False, custom_min_max = [])

    else:
        MF.add_basin_plot(BasinsName,fname_prefix,DataDirectory, value_dict = MOverNDict,
                          use_keys_not_junctions = True, show_colourbar = False,
                          discrete_cmap=True, n_colours=n_colours, colorbarlabel = "$m/n$",
                          colourmap = mn_cmap, adjust_text = False, cbar_dict=cbar_dict, parallel=parallel)

    # add the basin outlines
    if not parallel:
      Basins = LSDP.GetBasinOutlines(DataDirectory, BasinsName)
    else:
      Basins = LSDP.GetMultipleBasinOutlines(DataDirectory)

    MF.plot_polygon_outlines(Basins, linewidth=0.8)

    # add the channel network
    if not parallel:
        ChannelDF = Helper.ReadChiDataMapCSV(DataDirectory,fname_prefix)
    else:
        ChannelDF = Helper.AppendChiDataMapCSVs(DataDirectory,fname_prefix)
    ChannelPoints = LSDP.LSDMap_PointData(ChannelDF, data_type = "pandas", PANDEX = True)
    MF.add_point_data(ChannelPoints,show_colourbar="False", scale_points=True, column_for_scaling='drainage_area',alpha=0.1,zorder=100)

    # add the basin labelling
    if not parallel:
      Points = LSDP.GetPointWithinBasins(DataDirectory, BasinsName)
    else:
      Points = LSDP.GetPointsWithinMultipleBasins(DataDirectory, BasinsName)

    print ("Adding labels, the label dict is:", labeldict)
    MF.add_text_annotation_from_shapely_points_v2(Points, text_colour='k', label_dict=labeldict, zorder=200)

    if(lith):
        ImageName = ImageName[0:-(len(FigFormat)+1)]+"_lith."+FigFormat # adding a particle to the file
    MF.save_fig(fig_width_inches = fig_width_inches, FigFileName = ImageName, FigFormat=FigFormat, Fig_dpi = 300) # Save the figure


#=============================================================================
# UNCERTAINTY
# Functions that make plots of uncertainties on the m/n analysis and comparison
# between the different methods
#=============================================================================

def PlotMOverNDicts(DataDirectory,fname_prefix,SA_based_dict,Chi_based_dict, FigFormat = "png", size_format = "ESURF"):
    """
    This plots the estimates of m over n for basins contained in the SA and Chi based dicts

    Args:
        SA_based_dict (dict): A dictionary where the key is the basin and the values are lists of the best fit m/n for S-A plots
        Chi_based_dict (dict):  A dictionary where the key is the basin and the values are lists of the best fit m/n after removing outliers

    Author: SMM
    """

    # We need to deal with the colours. Each colour will indicate a different
    # Data point


    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)

    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=0.85,top=0.9)
    ax = fig.add_subplot(gs[5:100,10:95])
    ax.grid(zorder=-100)

    # Loop through the basins adding data
    for basin in SA_based_dict:
        this_SA_list = SA_based_dict[basin]
        this_chi_list = Chi_based_dict[basin]

        this_SA_list = np.multiply(this_SA_list,-1)
        full_list = np.concatenate((this_SA_list,this_chi_list))
        basin_list = [basin]*len(full_list)

        colours = []
        this_num = -2
        for element in this_SA_list:
            colours.append(this_num)
            this_num = this_num+0.25

        this_num = 1
        for element in this_chi_list:
            colours.append(this_num)
            this_num = this_num+0.25


        #print("Hey there pal, let me print out the two lists for you")
        #print(this_SA_list)
        #print(this_chi_list)

        #print("These are the colour and basin lists")
        #print(full_list)
        #print(basin_list)


        #colours = range(len(full_list))


        ax.scatter(basin_list,full_list,c=colours,edgecolors='k',s=15,cmap = plt.cm.PuOr,zorder = 100)



    # set the axes labels
    ax.set_xlabel('basin')
    ax.set_ylabel('$m/n$')
    # Save the figure
    ImageName = DataDirectory+fname_prefix+'_MOverNbasins.'+FigFormat
    plt.savefig(ImageName, format=FigFormat, dpi=300)
    fig.clf()

def plot_MCMC_analysis(DataDirectory,fname_prefix,basin_list=[],FigFormat='png',size_format='ESURF', parallel=False):
    """
    This function makes a plot of the MCMC uncertainty analysis for the MLE collinearity
    NOTE: haven't set this up for basins in parallel yet (FJC 19/10/17)

    Args:
        DataDirectory (str): the data directory
        fname_prefix (str): the DEM name without extension
        basin_list: list of basins to analyse. If none are passed then all basins are plotted.
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        parallel (bool): If true the data exists in multiple files for each basin and must be recombined (Default is False)

    Author: FJC
    """
    # check if a directory exists for the chi plots. If not then make it.
    MCMC_directory = DataDirectory+'MCMC_plots/'
    if not os.path.isdir(MCMC_directory):
        os.makedirs(MCMC_directory)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)

    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=0.85,top=0.9)
    ax = fig.add_subplot(gs[5:100,10:95])

    # get the basin info
    if not parallel:
      basin_df = Helper.ReadBasinInfoCSV(DataDirectory,fname_prefix)
      basin_stats_df = Helper.ReadBasinStatsCSV(DataDirectory,fname_prefix)
    else:
      basin_df = Helper.AppendBasinInfoCSVs(DataDirectory,fname_prefix)
      basin_stats_df = Helper.AppendBasinStatsCSVs(DataDirectory,fname_prefix)

    basin_keys = basin_df['basin_key']

    MOverNDict = SimpleMaxMLECheck(basin_stats_df)

    if basin_list == []:
        print ("You didn't give me a basin list so I'm going to plot all of them!")
        basin_list = basin_keys

    for basin in basin_list:
        # read in the chain csv file
        chain_df = Helper.ReadChainCSV(DataDirectory,fname_prefix,int(basin))

        best_fit_movern = MOverNDict[int(basin)]
        print (("The best fit m/n is:", best_fit_movern))
        n_iterations = len(chain_df['i'])
        #print n_iterations
        best_fit_line = [best_fit_movern] * len(chain_df['i'])

        # find the accepted values
        chain_df['AcceptedMask'] = chain_df['NAccepted'].diff()
        masked_df = chain_df.where(chain_df['AcceptedMask'] == 1.0)
        #print chain_df

        # plot the parameter with number of iterations
        ax.plot(masked_df['i'], masked_df['movern_New'], c='0.5', lw=0.5, zorder=1)
        ax.plot(chain_df['i'], best_fit_line, 'k--', zorder=100)

        #set plot labels
        ax.set_xlabel('N iterations')
        ax.set_ylabel('$m/n$')
        ax.set_title('Basin '+str(basin))

        # save the figure
        ImageName = MCMC_directory+fname_prefix+'_MCMC_basin' +str(basin)+'.'+FigFormat
        plt.savefig(ImageName, format=FigFormat, dpi=300)
        ax.cla()

def PlotMCPointsUncertainty(DataDirectory,fname_prefix, basin_list=[0], FigFormat='png',size_format='ESURF', start_movern=0.2,d_movern=0.1,n_movern=7,parallel=False):
    """
    This function makes a plot showing the range in m/n calculated
    using the MC points analysis. Makes a separate plot for each basin.

    Args:
        DataDirectory (str): the data directory
        fname_prefix (str): the DEM name without extension
        basin_list: list of basins to be analysed. If empty then will analyse all of them.
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).

    Returns:
        plot of uncertainty in m/n

    Author:
        FJC
    """
    import matplotlib.patches as patches

    # check if a directory exists for the chi plots. If not then make it.
    points_directory = DataDirectory+'MC_points_plots/'
    if not os.path.isdir(points_directory):
        os.makedirs(points_directory)

    # get the basin info
    if not parallel:
        basin_df = Helper.ReadBasinInfoCSV(DataDirectory,fname_prefix)
    else:
        basin_df = Helper.AppendBasinInfoCSVs(DataDirectory,fname_prefix)

    basin_keys = basin_df['basin_key']

    if basin_list == []:
        print ("You didn't give me a basin list so I'm going to plot all of them!")
        basin_list = basin_keys

    # get the uncertainty df
    if not parallel:
        PointsChiBasinDF = Helper.ReadMCPointsCSV(DataDirectory,fname_prefix)
    else:
        PointsChiBasinDF = Helper.AppendBasinPointCSVs(DataDirectory,fname_prefix)

    PointsChiBasinDF = PointsChiBasinDF[PointsChiBasinDF['basin_key'].isin(basin_list)]

    UncertaintyDF = GetMOverNRangeMCPoints(PointsChiBasinDF,start_movern,d_movern,n_movern)

    # set up the plot
    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)

    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=0.85,top=0.9)
    ax = fig.add_subplot(gs[5:100,10:95])

    if not parallel:
        BasinStatsDF = Helper.ReadBasinStatsCSV(DataDirectory,fname_prefix)
    else:
        BasinStatsDF = Helper.AppendBasinCSVs(DataDirectory,fname_prefix)
    # best fit moverns
    best_fit_moverns = SimpleMaxMLECheck(BasinStatsDF)

    for basin_key in basin_keys:
        ThisBasinDF = PointsChiBasinDF[PointsChiBasinDF['basin_key'] == basin_key]

        # get the m/ns tested
        end_movern = start_movern+d_movern*(n_movern-1)
        all_moverns = np.linspace(start_movern,end_movern,n_movern)

        # get the median m/ns for this basin
        Medians = (ThisBasinDF.filter(regex='median')).values.tolist()[-1]
        FirstQs = (ThisBasinDF.filter(regex='FQ')).values.tolist()[-1]
        ThirdQs = (ThisBasinDF.filter(regex='TQ')).values.tolist()[-1]

        # now get the threshold value
        ThisUncertaintyDF = UncertaintyDF[UncertaintyDF['basin_key'] == basin_key]
        print (ThisUncertaintyDF)

        #plot the median and quartiles
        ax.plot(all_moverns,Medians,c="k",lw=1,zorder=2)
        ax.plot(all_moverns,FirstQs,c="k", lw=0.5, ls="--",zorder=2)
        ax.plot(all_moverns,ThirdQs,c="k", lw=0.5, ls="--",zorder=2)
        ax.fill_between(all_moverns, FirstQs, ThirdQs, color="0.8",alpha=0.75,zorder=1)

        #add a line for the threshold
        threshold = ThisUncertaintyDF.iloc[0]['FirstQ_threshold']
        min_movern = ThisUncertaintyDF.iloc[0]['Min_MOverNs']
        max_movern = ThisUncertaintyDF.iloc[0]['Max_MOverNs']
        threshold_moverns = np.linspace(min_movern,max_movern, 4)
        threshold_MLEs = np.full((n_movern,),threshold)
        ax.plot(all_moverns,threshold_MLEs,c='r',zorder=3, ls="--",lw=1)

        # add shaded background over range of m/n values
        ax.axvspan(min_movern,max_movern, alpha=0.1, color='red',zorder=0.2)

        # add arrow at best fit m/n
        # get the limits for the arrow
        max_MLE = max(Medians)
        min_MLE = min(FirstQs)
        dy = (max_MLE-min_MLE)/8
        spacing = 1.5
        # add arrow at best fit m/n
        ax.add_patch(
            patches.Arrow(
                best_fit_moverns[basin_key], #x
                max_MLE-(dy*spacing), #y
                0, #dx
                dy, #dy
                width = 0.05,
                facecolor = 'k',
                edgecolor = 'k')
            )
        ax.text(best_fit_moverns[basin_key]-0.075, max_MLE-1.5*(dy*spacing), "Best-fit "+r"$\theta$",fontsize=8)

        # set the axes labels
        ax.set_xlabel(r'$\theta$')
        ax.set_ylabel('MLE')
        #ax.set_title('Basin '+str(basin_key))
        ax.set_xlim(start_movern,end_movern)
        ax.set_ylim(min(FirstQs),max(ThirdQs)+0.0005)

        # save the figure
        ImageName = points_directory+fname_prefix+'_MC_points' +str(basin_key)+'.'+FigFormat
        plt.savefig(ImageName, format=FigFormat, dpi=300)
        ax.cla()

    fig.clf()
    plt.close(fig)

#=============================================================================
# SENSITIVITY FUNCTIONS
# Functions that make plots of sensitivity tests on the m/n analysis
#=============================================================================
def PlotSensitivityResultsSigma(DataDirectory,fname_prefix, FigFormat = "png", size_format = "ESURF", movern_method = "all"):
    """
    This function makes a plot of the results of a sensitivity analysis on sigma in the MLE method
    of calculating m/n.
    You need to specify the base directory - will look in every folder in this base directory
    with the prefix "Chi_analysis_sigma_" and get the m/n analysis in each sub-directory.
    NOTE - not set up for parallel (FJC 19/10/17)

    Args:
        DataDirectory (str): the root data directory
        fname_prefix (str): the DEM name without extension
        FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
        size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
        movern_method (str): the movern method you want to test. Can be "all" or "points"

    Author: FJC
    """
    import os

    # Try doing this as a dataframe and masking?!?!
    columns = ["sigma", "basin_key", "m_over_n"]
    combined_DF = pd.DataFrame(columns=columns)

    # loop through each sub-directory with the sensitivity results
    MLE_str = "sigma_"
    for subdir, dirs, files in os.walk(DataDirectory):
        for dir in dirs:
            if MLE_str in dir:
                this_dir = DataDirectory+"/"+dir+'/'
                print (this_dir)

                this_df = pd.DataFrame(columns=columns)

                # get this value of sigma
                this_sigma = (dir.split("_"))[-1]

                # get the best fit m/n dataframe
                if movern_method == 'all':
                    BasinDF = Helper.ReadBasinStatsCSV(this_dir,fname_prefix)
                elif movern_method == 'points':
                    BasinDF = Helper.ReadBasinStatsPointCSV(this_dir,fname_prefix)
                else:
                    print ("You didn't specify a correct m/n method, you need to specify either 'all' or 'points'")
                MOverNDict = SimpleMaxMLECheck(BasinDF)
                this_df['sigma'] = [int(this_sigma)] * len(MOverNDict)
                this_df['basin_key'] = MOverNDict.keys()
                this_df['m_over_n'] = MOverNDict.values()
                combined_DF = combined_DF.append(this_df, ignore_index=True)

    # Set up fonts for plots
    label_size = 10
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Liberation Sans']
    rcParams['font.size'] = label_size

    # make a figure
    fig = makefigure(size_format)

    gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=0.85,top=0.9)
    ax = fig.add_subplot(gs[5:100,10:95])
    #ax.grid(zorder=-100)

    # now get the basin keys
    basin_keys = combined_DF['basin_key'].unique()
    basin_keys = [int(x) for x in basin_keys]

    keys = []
    sigmas = []
    # loop through the basin keys and plot sigma and m/n for each basin
    for key in basin_keys:
        this_df = combined_DF.loc[combined_DF['basin_key'] == key]
        #sort the data for plotting
        this_df.sort_values('sigma',inplace=True)
        this_df = this_df.loc[combined_DF['m_over_n'] > 0.2]
        if not this_df.empty:
            keys.append(int(key))
            sigmas.append(this_df['sigma'].iloc[0])

    ax.scatter(keys, sigmas, c='0.75', edgecolor='k')
    #
    # set the axes labels
    ax.set_xlabel('Basin key')
    ax.set_ylabel('$\sigma$ where $m/n$ is invariant')
    ax.set_xticks(basin_keys)

    # Save the figure
    ImageName = DataDirectory+fname_prefix+'_sensitivity.'+FigFormat
    plt.savefig(ImageName, format=FigFormat, dpi=300)
    fig.clf()

# def PlotSensitivityResultsMLEWithSigma(DataDirectory,fname_prefix, FigFormat = "png", size_format = "ESURF"):
#     """
#     This function makes a plot of the results of a sensitivity analysis on sigma in the MLE method
#     of calculating m/n.
#     You need to specify the base directory - will look in every folder in this base directory
#     with the prefix "sigma_" and get the m/n analysis in each sub-directory.
#     Makes a plot of MLE with increasing sigma to show where it becomes invariant
#
#     Args:
#         DataDirectory (str): the root data directory
#         fname_prefix (str): the DEM name without extension
#         FigFormat (str): The format of the figure. Usually 'png' or 'pdf'. If "show" then it calls the matplotlib show() command.
#         size_format (str): Can be "big" (16 inches wide), "geomorphology" (6.25 inches wide), or "ESURF" (4.92 inches wide) (defualt esurf).
#
#     Author: FJC
#     """
#     import os
#
#     # Try doing this as a dataframe and masking?!?!
#     columns = ["sigma", "basin_key", "m_over_n"]
#     combined_DF = pd.DataFrame(columns=columns)
#
#     # loop through each sub-directory with the sensitivity results
#     MLE_str = "sigma_"
#     for subdir, dirs, files in os.walk(DataDirectory):
#         for dir in dirs:
#             if MLE_str in dir:
#                 this_dir = DataDirectory+"/"+dir+'/'
#                 print this_dir
#
#                 this_df = pd.DataFrame(columns=columns)
#
#                 # get this value of sigma
#                 this_sigma = (dir.split("_"))[-1]
#
#                 # get the best fit m/n dataframe
#                 BasinDF = Helper.ReadBasinStatsCSV(this_dir,fname_prefix)
#                 MOverNDict = SimpleMaxMLECheck(BasinDF)
#                 this_df['sigma'] = [int(this_sigma)] * len(MOverNDict)
#                 this_df['basin_key'] = MOverNDict.keys()
#                 this_df['m_over_n'] = MOverNDict.values()
#                 combined_DF = combined_DF.append(this_df, ignore_index=True)
#
#     # Set up fonts for plots
#     label_size = 10
#     rcParams['font.family'] = 'sans-serif'
#     rcParams['font.sans-serif'] = ['Liberation Sans']
#     rcParams['font.size'] = label_size
#
#     # make a figure
#     if size_format == "geomorphology":
#         fig = plt.figure(1, facecolor='white',figsize=(6.25,3.5))
#         #l_pad = -40
#     elif size_format == "big":
#         fig = plt.figure(1, facecolor='white',figsize=(16,9))
#         #l_pad = -50
#     else:
#         fig = plt.figure(1, facecolor='white',figsize=(4.92126,3.2))
#         #l_pad = -35
#
#     gs = plt.GridSpec(100,100,bottom=0.15,left=0.1,right=0.85,top=0.9)
#     ax = fig.add_subplot(gs[5:100,10:95])
#     #ax.grid(zorder=-100)
#
#     # now get the basin keys
#     basin_keys = combined_DF['basin_key'].unique()
#     basin_keys = [int(x) for x in basin_keys]
#
#     keys = []
#     sigmas = []
#     MLEs = []
#     # loop through the basin keys and plot sigma and m/n for each basin
#     for key in basin_keys:
#         this_df = combined_DF.loc[combined_DF['basin_key'] == key]
#         #sort the data for plotting
#         this_df.sort_values('sigma',inplace=True)
#         this_df = this_df.loc[combined_DF['m_over_n'] > 0.2]
#         if not this_df.empty:
#             keys.append(int(key))
#             sigmas.append(this_df['sigma'].iloc[0])
#
#     ax.scatter(keys, sigmas, c='0.75', edgecolor='k')
#     #
#     # set the axes labels
#     ax.set_xlabel('Basin key')
#     ax.set_ylabel('$\sigma$ where $m/n$ is invariant')
#     ax.set_xticks(basin_keys)
#
#     # Save the figure
#     ImageName = DataDirectory+fname_prefix+'_sensitivity.'+FigFormat
#     plt.savefig(ImageName, format=FigFormat, dpi=300)
#     fig.clf()