# -*- coding: utf-8 -*- # # FRETBursts - A single-molecule FRET burst analysis toolkit. # # Copyright (C) 2013-2016 The Regents of the University of California, # Antonino Ingargiola <tritemio@gmail.com> # """ The module `burtlib_ext.py` (by default imported as `bext`) contains extensions to `burstslib.py`. It can be though as a simple plugin system for FRETBursts. Functions here defined operate on :class:`fretbursts.burstlib.Data()` objects extending the functionality beyond the core functions and methods defined in `burstlib`. This modularization allows to implement new functions without overloading the :class:`fretbursts.burstlib.Data` with an high number of non-core methods. The type of functions here implemented are quite diverse. A short summary follows. * :func:`burst_search_and_gate` performs the AND-gate burst search taking intersection of the bursts detected in two photons streams. * :func:`burst_data` returns a pandas DataFrame with burst data (one burst per row). Burst data includes sizes, duration, E, S, etc.... * :func:`bursts_fitter` and :func:`fit_bursts_kde_peak` help to build and fit histograms and KDEs for E or S. * :func:`calc_mdelays_hist` computes the histogram of the m-delays distribution of photon intervals. * :func:`moving_window_chunks`: slices the measurement using a moving-window (along the time axis). Used to follow or detect kinetics. * :func:`join_data` joins different measurements to create a single "virtual" measurement from a series of measurements. Finally a few functions deal with burst timestamps: * :func:`get_burst_photons` returns a list of timestamps for each burst. * :func:`ph_burst_stats` compute any statistics (for example mean or median) on the timestamps of each burst. * :func:`asymmetry` returns a burst "asymmetry index" based on the difference between Donor and Acceptor timestamps. """ from __future__ import division, print_function, absolute_import from builtins import range, zip import numpy as np from scipy.stats import erlang from scipy.optimize import leastsq import pandas as pd import tables from .ph_sel import Ph_sel from . import background as bg from .utils.misc import pprint, HistData, _is_list_of_arrays from . import burstlib from . import select_bursts from . import fret_fit from . import mfit from .burstlib import isarray, Data from .phtools.burstsearch import Bursts def moving_window_startstop(start, stop, step, window=None): """Computes list of (start, stop) values defining a moving-window. Arguments: start, stop (scalars): range spanned by the moving window. step (scalar): window shift at each "step". window (scalar): window duration. If None, window = step. Returns: A list of (start, stop) values for the defined moving-window range. """ if window is None: window = step # Reduce `stop` to an integer number of `step` and adjust for last window stop_corrected = (stop - stop % step) - window + step # differently from range, np.arange accepts also floats return [(t1, t1 + window) for t1 in np.arange(start, stop_corrected, step)] def moving_window_dataframe(start, stop, step, window=None, time_zero=0): """Create a DataFrame for storing moving-window data. Create and return a DataFrame for storing columns of moving-window data. Three columns are initialize with "time axis" data: 'tstart', 'tstop' and 'tmean'. The returned DataFrame is typically used to store (in new columns) quantities as function of the moving time-window. Examples of such quantities are number of bursts, mean burst size/duration, fitted E peak position, etc. Arguments: start, stop (scalars): range spanned by the moving window. step (scalar): window shift at each "step". window (scalar): window duration. If None, window = step. Returns: DataFrame with 3 columns (tstart, tstop, tmean), one row for each window position. See also: :func:`moving_window_chunks`. """ mw_slices = np.array(moving_window_startstop(start, stop, step, window)) tstart = mw_slices[:, 0] tstop = mw_slices[:, 1] tmean = 0.5 * (tstart + tstop) df = pd.DataFrame(data=dict(tstart=tstart, tstop=tstop, tmean=tmean)) return df - time_zero def moving_window_chunks(dx, start, stop, step, window=None, time_zero=0): """Return a list of Data object, each containing bursts in one time-window. Each returned Data object contains only bursts lying in the current time-window. Additionally, the start/stop values of current time-window are saved in `Data`'s attributes: name, slice_tstart and slice_tstop. Arguments: dx (Data): the Data() object to be sliced with a moving window. start, stop (scalars): time-range in seconds spanned by the moving window. step (scalar): window time-shift at each step. window (scalar): window duration. If None, window = step. time_zero (scalar): shift the start/stop times saved in the Data attributes so that "time zero" falls at `time_zero` seconds. Default 0, no shift. Returns: A list of Data objects, one for each window position. See also: :func:`moving_window_dataframe`. """ time_slices = moving_window_startstop(start, stop, step, window) dx_slices = [] for t1, t2 in time_slices: dx_slice = dx.select_bursts(select_bursts.time, time_s1=t1, time_s2=t2) dx_slice.name = 'Slice %d:%d s' % (t1 - time_zero, t2 - time_zero) dx_slice.add(slice_tstart=t1 - time_zero, slice_tstop=t2 - time_zero) dx_slices.append(dx_slice) return dx_slices def calc_mean_lifetime(dx, t1=0, t2=np.inf, ph_sel=Ph_sel('all')): """Compute the mean lifetime in each burst. Arguments: t1, t2 (floats): min and max value (in TCSPC bin units) for the nanotime to be included in the mean ph_sel (Ph_sel object): object defining the photon selection. See :mod:`fretbursts.ph_sel` for details. Returns: List of arrays of per-burst mean lifetime. One array per channel. """ mean_lifetimes = [] for bursts, nanot, mask in zip(dx.mburst, dx.nanotimes, dx.iter_ph_masks(ph_sel)): selection = (nanot > t1) * (nanot < t2) # Select photons in ph_sel AND with nanotime in [t1, t2] if isarray(mask): selection *= mask mean_lifetimes.append( burstlib.burst_ph_stats(nanot, bursts, mask=selection, func=np.mean) - t1) return mean_lifetimes def _store_bg_data(store, base_name, min_ph_delays_us, best_bg, best_th, BG_data, BG_data_e): if not base_name.endswith('/'): base_name = base_name + '/' store_name = store.filename group_name = '/' + base_name[:-1] store.create_carray(group_name, 'min_ph_delays_us', obj=min_ph_delays_us, createparents=True) for ph_sel, values in BG_data.items(): store.create_carray(group_name, str(ph_sel), obj=values) for ph_sel, values in BG_data_e.items(): store.create_carray(group_name, str(ph_sel) + '_err', obj=values) store.close() store = pd.HDFStore(store_name) store[base_name + 'best_bg'] = best_bg store[base_name + 'best_th'] = best_th store.close() def _load_bg_data(store, base_name, ph_streams): if not base_name.endswith('/'): base_name = base_name + '/' store_name = store.filename group_name = '/' + base_name[:-1] min_ph_delays = store.get_node(group_name, 'min_ph_delays_us')[:] BG_data = {} for ph_sel in ph_streams: BG_data[ph_sel] = store.get_node(group_name, str(ph_sel))[:] BG_data_e = {} for ph_sel in ph_streams: BG_data_e[ph_sel] = store.get_node(group_name, str(ph_sel) + '_err')[:] store.close() store = pd.HDFStore(store_name) best_bg = store[base_name + 'best_bg'] best_th = store[base_name + 'best_th'] store.close() return best_th, best_bg, BG_data, BG_data_e, min_ph_delays def calc_bg_brute_cache(dx, min_ph_delay_list=None, return_all=False, error_metrics='KS', force_recompute=False): """Compute background for all the ch, ph_sel and periods caching results. This function performs a brute-force search of the min ph delay threshold. The best threshold is the one the minimizes the error function. The best background fit is the rate fitted using the best threshold. Results are cached to disk and loaded transparently when needed. The cache file is an HDF5 file named `dx.fname[:-5] + '_BKG.hdf5'`. Arguments: min_ph_delay_list (sequence): sequence of values used for the brute-force search. Background and error will be computed for each value in `min_ph_delay_list`. return_all (bool): if True return all the fitted backgrounds and error functions. Default False. error_metrics (string): Specifies the error metric to use. See :func:`fretbursts.background.exp_fit` for more details. force_recompute (bool): if True, recompute results even if a cache is found. Returns: Two arrays with best threshold (us) and best background. If `return_all = True` also returns the dictionaries containing all the fitted backgrounds and errors. """ if min_ph_delay_list is None: min_ph_delay_list = np.arange(100, 8500, 100) else: min_ph_delay_list = np.asfarray(min_ph_delay_list) base_name = 'bg_%s_%ds_%s/' % (dx.bg_fun_name, dx.bg_time_s, error_metrics) store_fname = dx.fname[:-5] + '_BKG.hdf5' comp_filter = tables.Filters(complevel=6, complib='zlib') store = tables.open_file(store_fname, mode='a', filters=comp_filter) loaded = False if '/' + base_name + 'min_ph_delays_us' in store: Th = store.get_node('/', base_name + 'min_ph_delays_us').read() if np.all(Th == min_ph_delay_list) and not force_recompute: print(' - Loading BG from cache') res = _load_bg_data(store, base_name, dx.ph_streams) loaded = True if not loaded: print(' - Computing BG') res = calc_bg_brute(dx, min_ph_delay_list=min_ph_delay_list, return_all=True, error_metrics=error_metrics) best_th, best_bg, BG_data, BG_data_e, min_ph_delays = res _store_bg_data(store, base_name, min_ph_delays, best_bg, best_th, BG_data, BG_data_e) if return_all: return res else: return res[:-3] def calc_bg_brute(dx, min_ph_delay_list=None, return_all=False, error_metrics='KS'): """Compute background for all the ch, ph_sel and periods. This function performs a brute-force search of the min ph delay threshold. The best threshold is the one the minimizes the error function. The best background fit is the rate fitted using the best threshold. Arguments: min_ph_delay_list (sequence): sequence of values used for the brute-force search. Background and error will be computed for each value in `min_ph_delay_list`. return_all (bool): if True return all the fitted backgrounds and error functions. Default False. error_metrics (string): Specifies the error metric to use. See :func:`fretbursts.background.exp_fit` for more details. Returns: Two arrays with best threshold (us) and best background. If `return_all = True` also returns the dictionaries containing all the fitted backgrounds and errors. """ if min_ph_delay_list is None: min_ph_delay_list = np.arange(100, 8500, 100) else: min_ph_delay_list = np.asfarray(min_ph_delay_list) ph_sel_labels = [str(p) for p in dx.ph_streams] BG_data, BG_data_e = {}, {} index = pd.MultiIndex.from_product([list(range(dx.nch)), ph_sel_labels], names=['CH', 'ph_sel']) best_th = pd.DataFrame(index=index, columns=np.arange(dx.nperiods)) best_th.columns.name = 'period' best_th.sortlevel(inplace=True) best_bg = best_th.copy() for ph_sel in dx.ph_streams: # Compute BG and error for all ch, periods and thresholds # Shape: (nch, nperiods, len(thresholds)) BG_data[ph_sel], BG_data_e[ph_sel] = bg.fit_varying_min_delta_ph( dx, min_ph_delay_list, bg_fit_fun=bg.exp_fit, ph_sel=ph_sel, error_metrics=error_metrics) # Compute the best Th and BG estimate for all ch and periods for ich in range(dx.nch): for period in range(dx.nperiods): b = BG_data_e[ph_sel][ich, period, :] i_best_th = b[-np.isnan(b)].argmin() best_th.loc[(ich, str(ph_sel)), period] = \ min_ph_delay_list[i_best_th] best_bg.loc[(ich, str(ph_sel)), period] = \ BG_data[ph_sel][ich, period, i_best_th] if return_all: return best_th, best_bg, BG_data, BG_data_e, min_ph_delay_list else: return best_th, best_bg def burst_data(dx, include_bg=False, include_ph_index=False, skip_ch=None): """Return a table (`pd.DataFrame`) of burst data (one row per burst). Columns include: - *E* and *S*: FRET and stoichiometry for each burst. - *nd*, *na*, *naa*, *nda*: burst counts in DexDem, DexAem, AexAem and AexDem photon streams. - *t_start*, *t_stop*: time (in seconds) of first and last photon inside the burst - *width_ms*: burst duration in milliseconds - *size_raw*: the total uncorrected burst counts in the photon stream used for burst search Optional columns include: - *i_start*, *i_stop*: index of burst start and stop relative to the original timestamps array (requires `include_ph_index=True`) - *bg_dd*, *bg_ad*, *bg_aa*, *bg_da*: background contribution in the DexDem, DexAem, AexAem, AexDem photon streams (requires `include_bg=True`). If the peak photon-counts in each bursts has been computed (see :meth:`fretbursts.burstlib.Data.calc_max_rate`), it will be included as a column called *max_rate*. Arguments: include_bg (bool): if True includes additional columns for burst background (see above). Default False. include_ph_index (bool): if True includes additional two columns for index of first and last timestamp in each burst. Default False. skip_ch (list or None): List of channels to skip if measurement is multispot. Return: A pandas's DataFrame containing burst data (one row per burst). """ def _burst_data_ich(dx, ich, include_bg=False, include_ph_index=False): bursts_ich = dx.burst_data_ich(ich=ich) if not include_ph_index: bursts_ich.pop('i_start') bursts_ich.pop('i_stop') if not include_bg: bursts_ich = {k: v for k, v in bursts_ich.items() if not k.startswith('bg')} return pd.DataFrame.from_dict(bursts_ich) if skip_ch is None: skip_ch = [] kws = dict(include_bg=include_bg, include_ph_index=include_ph_index) bursts = {} for ich in range(dx.nch): if ich not in skip_ch and dx.num_bursts[ich] > 0: bursts[ich] = _burst_data_ich(dx, ich=ich, **kws) bursts[ich]['spot'] = ich if dx.nch > 1: bursts = pd.concat(bursts, ignore_index=True) else: bursts = bursts[0] return bursts def fit_bursts_kde_peak(dx, burst_data='E', bandwidth=0.03, weights=None, gamma=1, add_naa=False, x_range=(-0.1, 1.1), x_ax=None, save_fitter=True): """Fit burst data (typ. E or S) by finding the KDE max on all the channels. Arguments: dx (Data): `Data` object containing the FRET data burst_data (string): name of burst-data attribute (i.e 'E' or 'S'). bandwidth (float): bandwidth for the Kernel Density Estimation weights (string or None): kind of burst-size weights. See :func:`fretbursts.fret_fit.get_weights`. gamma (float): gamma factor passed to `get_weights()`. add_naa (bool): if True adds `naa` to the burst size. save_fitter (bool): if True save the `MultiFitter` object in the `dx` object with name: burst_data + '_fitter'. x_range (tuple of floats): min-max range where to search for the peak. Used to select a single peak in a multi-peaks distribution. x_ax (array or None): x-axis used to evaluate the Kernel Density Returns: An array of max peak positions (one per ch). If the number of channels is 1 returns a scalar. """ if x_ax is None: x_ax = np.arange(-0.2, 1.2, 0.0002) fitter = bursts_fitter(dx, burst_data=burst_data, save_fitter=save_fitter, weights=weights, gamma=gamma, add_naa=add_naa) fitter.calc_kde(bandwidth=bandwidth) fitter.find_kde_max(x_ax, xmin=x_range[0], xmax=x_range[1]) KDE_max_mch = fitter.kde_max_pos if dx.nch == 1: KDE_max_mch = KDE_max_mch[0] return KDE_max_mch def bursts_fitter(dx, burst_data='E', save_fitter=True, weights=None, gamma=1, add_naa=False, skip_ch=None, binwidth=None, bandwidth=None, model=None, verbose=False): """Create a mfit.MultiFitter object (for E or S) add it to `dx`. A MultiFitter object allows to fit multi-channel data with the same model. Arguments: dx (Data): `Data` object containing the FRET data save_fitter (bool): if True save the `MultiFitter` object in the `dx` object with name: burst_data + '_fitter'. burst_data (string): name of burst-data attribute (i.e 'E' or 'S'). weights (string or None): kind of burst-size weights. See :func:`fretbursts.fret_fit.get_weights`. gamma (float): gamma factor passed to `get_weights()`. add_naa (bool): if True adds `naa` to the burst size. binwidth (float or None): bin width used to compute the histogram. If None the histogram is not computed. bandwidth (float or None): bandwidth used to compute the KDE If None the KDE is not computed. model (lmfit.Model object or None): lmfit Model used for histogram fitting. If None the histogram is not fitted. verbose (bool): if False avoids printing any output. Returns: The `mfit.MultiFitter` object with the specified burst-size weights. """ assert burst_data in dx fitter = mfit.MultiFitter(dx[burst_data], skip_ch=skip_ch) if weights is None or _is_list_of_arrays(weights): # If no weights or precomputed weights fitter.weights = weights else: # Otherwise build the kwargs to compute weights through get_weights weight_kwargs = dict(weights=weights, gamma=gamma, nd=dx.nd, na=dx.na) if add_naa: weight_kwargs['naa'] = dx.naa if weights == 'brightness': weight_kwargs['widths'] = [mb.width*dx.clk_p for mb in dx.mburst] fitter.set_weights_func(weight_func=fret_fit.get_weights, weight_kwargs=weight_kwargs) if bandwidth is not None: fitter.calc_kde(bandwidth) if binwidth is not None: fitter.histogram(binwidth=binwidth) if model is not None: fitter.fit_histogram(model=model, verbose=verbose) if save_fitter: # Save fitter variables as normal attributes (not with Data.add) # so they are not copied when copying Data (e.g. burst selection) setattr(dx, burst_data + '_fitter', fitter) setattr(dx, burst_data + '_weights', (weights, float(gamma), add_naa)) return fitter def _get_bg_distrib_erlang(d, ich=0, m=10, ph_sel=Ph_sel('all'), period=(0, -1)): """Return a frozen (scipy) erlang distrib. with rate equal to the bg rate. """ assert ph_sel in [Ph_sel('all'), Ph_sel(Dex='Dem'), Ph_sel(Dex='Aem')] # Compute the BG distribution if ph_sel == Ph_sel('all'): bg_ph = d.bg_dd[ich] + d.bg_ad[ich] elif ph_sel == Ph_sel(Dex='Dem'): bg_ph = d.bg_dd[ich] elif ph_sel == Ph_sel(Dex='Aem'): bg_ph = d.bg_ad[ich] rate_ch_kcps = bg_ph[period[0]:period[1]+1].mean()/1e3 # bg rate in kcps bg_dist = erlang(a=m, scale=1./rate_ch_kcps) return bg_dist def _get_bg_erlang(d, ich=0, m=10, ph_sel=Ph_sel('all'), period=0): """Return a frozen (scipy) erlang distrib. with rate equal to the bg rate. """ bg_rate = d.bg_from(ph_sel=ph_sel)[ich][period] #bg_rate_kcps = bg_rate*1e-3 bg_dist = erlang(a=m, scale=1./bg_rate) return bg_dist def histogram_mdelays(d, ich=0, m=10, ph_sel=Ph_sel('all'), binwidth=1e-3, dt_max=10e-3, bins=None, inbursts=False): """Compute histogram of m-photons delays (or waiting times). Arguments: dx (Data object): contains the burst data to process. ich (int): the channel number. Default 0. m (int): number of photons used to compute each delay. ph_sel (Ph_sel object): photon selection to use. inbursts (bool): if True, compute the histogram with only photons in bursts. Returns: A `HistData` object containing the computed histogram. """ if bins is None: bins = np.arange(0, dt_max, binwidth) if not inbursts: ph = d.get_ph_times(ich=ich, ph_sel=ph_sel) else: ph = d.ph_in_bursts_ich(ich=ich, ph_sel=ph_sel) ph_mdelays = np.diff(ph[::m])*d.clk_p hist = HistData(*np.histogram(ph_mdelays, bins=bins)) return hist def calc_mdelays_hist(d, ich=0, m=10, period=(0, -1), bins_s=(0, 10, 0.02), ph_sel=Ph_sel('all'), bursts=False, bg_fit=True, bg_F=0.8): """Compute histogram of m-photons delays (or waiting times). Arguments: dx (Data object): contains the burst data to process. ich (int): the channel number. Default 0. m (int): number of photons used to compute each delay. period (int or 2-element tuple): index of the period to use. If tuple, the period range between period[0] and period[1] (included) is used. bins_s (3-element tuple): start, stop and step for the bins ph_sel (Ph_sel object): photon selection to use. Returns: Tuple of values: * bin_x (array): array of bins centers * histograms_y (array): arrays of histograms, contains 1 or 2 histograms (when `bursts` is False or True) * bg_dist (random distribution): erlang distribution with same rate as background (kcps) * a, rate_kcps (floats, optional): amplitude and rate for an Erlang distribution fitted to the histogram for bin_x > bg_mean*bg_F. Returned only if `bg_fit` is True. """ assert ph_sel in [Ph_sel('all'), Ph_sel(Dex='Dem'), Ph_sel(Dex='Aem')] if np.size(period) == 1: period = (period, period) periods = slice(d.Lim[ich][period[0]][0], d.Lim[ich][period[1]][1] + 1) bins = np.arange(*bins_s) if ph_sel == Ph_sel('all'): ph = d.ph_times_m[ich][periods] if bursts: phb = ph[d.ph_in_burst[ich][periods]] elif ph_sel == Ph_sel(Dex='Dem'): donor_ph_period = -d.A_em[ich][periods] ph = d.ph_times_m[ich][periods][donor_ph_period] if bursts: phb = ph[d.ph_in_burst[ich][periods][donor_ph_period]] elif ph_sel == Ph_sel(Dex='Aem'): accept_ph_period = d.A_em[ich][periods] ph = d.ph_times_m[ich][periods][accept_ph_period] if bursts: phb = ph[d.ph_in_burst[ich][periods][accept_ph_period]] ph_mdelays = np.diff(ph[::m])*d.clk_p*1e3 # millisec if bursts: phb_mdelays = np.diff(phb[::m])*d.clk_p*1e3 # millisec phb_mdelays = phb_mdelays[phb_mdelays < 5] # Compute the PDF through histograming hist_kwargs = dict(bins=bins, normed=True) mdelays_hist_y, _ = np.histogram(ph_mdelays, **hist_kwargs) bin_x = bins[:-1] + 0.5*(bins[1] - bins[0]) if bursts: mdelays_b_hist_y, _ = np.histogram(phb_mdelays, **hist_kwargs) mdelays_b_hist_y *= phb_mdelays.size/ph_mdelays.size if bursts: histograms_y = np.vstack([mdelays_hist_y, mdelays_b_hist_y]) else: histograms_y = mdelays_hist_y results = [bin_x, histograms_y] # Compute the BG distribution bg_dist = _get_bg_distrib_erlang(d, ich=ich, m=m, period=period, ph_sel=ph_sel) bg_mean = bg_dist.mean() results.append(bg_dist) if bg_fit: ## Fitting the BG portion of the PDF to an Erlang _x = bin_x[bin_x > bg_mean*bg_F] _y = mdelays_hist_y[bin_x > bg_mean*bg_F] fit_func = lambda x, a, rate_kcps: a*erlang.pdf(x, a=m, scale=1./rate_kcps) err_func = lambda p, x, y: fit_func(x, p[0], p[1]) - y p, flag = leastsq(err_func, x0=[0.9, 3.], args=(_x, _y)) print(p, flag) a, rate_kcps = p results.extend([a, rate_kcps]) return results def burst_data_period_mean(dx, burst_data): """Compute mean `burst_data` in each period. Arguments: dx (Data object): contains the burst data to process burst_data (list of arrays): one array per channel, each array has one element of "burst data" per burst. Returns: 2D of arrays with shape (nch, nperiods). Example: burst_period_mean(dx, dx.nt) """ mean_burst_data = np.zeros((dx.nch, dx.nperiods)) for ich, (b_data_ch, period) in enumerate(zip(burst_data, dx.bp)): for iperiod in range(dx.nperiods): mean_burst_data[ich, iperiod] = b_data_ch[period == iperiod].mean() return mean_burst_data def join_data(d_list, gap=0): """Joins burst data of different measurements in a single `Data` object. Merge a list of `Data` objects (i.e. a set of different measurements) into a single `Data` object containing all the bursts (like it was a single acquisition). The `Data` objects to be merged need to already contain burst data. The input `Data` objects are required to have undergone background estimation (all with the same background period) and burst search. For each measurement, the time of burst start is offset by the duration of the previous measurement + an additional `gap` (which is 0 by default). The index of the first/last photon in the burst (`istart` and `iend`) are kept unmodified and refer to the original timestamp array. The timestamp arrays are not copied: the new `Data` object will not contain any timestamp arrays (ph_times_m). This may cause errors when calling functions that require the timestamps data such as burst search. The background arrays (bg, bg_dd, etc...) are concatenated. The burst attribute `bp` is updated to refer to these new concatenated arrays. The attributes `Lim` and `Ph_p` are concatenated and left unchanged. Therefore different sections will refer to different original timestamp arrays. The retuned `Data` object will have a new attribute `i_origin`, containing, for each burst, the index of the original data object in the list. Arguments: d_list (list of Data objects): the list of measurements to concatenate. gap (float): the time delay (or gap) in seconds to add to each concatenated measurement. Returns: A `Data` object containing bursts from the all the objects in `d_list`. This object will not contain timestamps, therefore it is possible to perform burst selections but not a new burst serach. Example: If `d1` and `d2` are two measurements to concatenate:: file_list = ['filename1', 'filename2'] d_list = [loader.photon_hdf5(f) for f in file_list] for dx in d_list: loader.alex_apply_period(dx) dx.calc_bg(bg.exp_fit, time_s=30, tail_min_us='auto', F_bg=1.7) dx.burst_search() d_merged = bext.join_data(d_list) `d_merged` will contain bursts from both input files. """ from itertools import islice nch = d_list[0].nch bg_time_s = d_list[0].bg_time_s for d in d_list: assert d.nch == nch assert d.bg_time_s == bg_time_s new_d = Data(**d_list[0]) new_d.delete('ph_times_m') # Set the bursts fields by concatenation along axis = 0 for name in Data.burst_fields: if name in new_d: empty = Bursts.empty() if name is 'mburst' else np.array([]) new_d.add(**{name: [empty]*nch}) concatenate = Bursts.merge if name == 'mburst' else np.concatenate for ich in range(nch): new_size = np.sum((d.mburst[ich].num_bursts for d in d_list)) if new_size == 0: continue # -> No bursts in this ch value = concatenate([d[name][ich] for d in d_list]) new_d[name][ich] = value assert new_d[name][ich].size == new_size # Set the background fields by concatenation along axis = 0 new_nperiods = np.sum((d.nperiods for d in d_list)) for name in ('Lim', 'Ph_p'): if name in new_d: new_d.add(**{name: []}) for ich in range(nch): value = np.concatenate([d[name][ich] for d in d_list]) new_d[name].append(value) assert new_d[name][ich].shape[0] == new_nperiods if 'bg' in new_d: new_d.add(bg={}) for sel in d.bg: new_d.bg[sel] = [] for ich in range(nch): value = np.concatenate([d.bg[sel][ich] for d in d_list]) new_d.bg[sel].append(value) assert new_d.bg[sel][ich].shape[0] == new_nperiods # Set the i_origin burst attribute new_d.add(i_origin=[]) for ich in range(nch): i_origin_ch = np.concatenate([i_d * np.ones(d.num_bursts[ich]) for i_d, d in enumerate(d_list)]) new_d.i_origin.append(i_origin_ch) # Update the `bp` attribute to refer to the background period in # the new concatenated background arrays. sum_nperiods = np.cumsum([d.nperiods for d in d_list]) for i_d, d in islice(enumerate(d_list), 1, None): for ich in range(nch): # Burst "slice" in new_d coming from current d b_mask = new_d.i_origin[ich] == i_d # Add the nperiods of all the previous measurements new_d.bp[ich][b_mask] = new_d.bp[ich][b_mask] + sum_nperiods[i_d-1] # Modify the new mburst so the time of burst start/end is monotonic offset_clk = 0 iburst_start = d_list[0].num_bursts.copy() for d_i in d_list[1:]: offset_clk += int((d_i.time_max + gap) / d_i.clk_p) for ich in range(nch): if new_d.mburst[ich].num_bursts == 0: continue mask = slice(iburst_start[ich], iburst_start[ich] + d_i.num_bursts[ich]) bursts_ch_i = new_d.mburst[ich][mask] bursts_ch_i.start += offset_clk bursts_ch_i.stop += offset_clk iburst_start += d_i.num_bursts return new_d def burst_search_and_gate(dx, F=6, m=10, min_rate_cps=None, c=-1, ph_sel1=Ph_sel(Dex='DAem'), ph_sel2=Ph_sel(Aex='Aem'), compact=False, mute=False): """Return a Data object containing bursts obtained by and-gate burst-search. The and-gate burst search is a composition of 2 burst searches performed on different photon selections. The bursts in the and-gate burst search are the overlapping bursts in the 2 initial burst searches, and their duration is the intersection of the two overlapping bursts. By default the 2 photon selections are D+A photons during D excitation (`Ph_sel(Dex='DAem')`) and A photons during A excitation (`Ph_sel(Aex='Aex')`). Arguments: dx (Data object): contains the data on which to perform the burst search. Background estimation must be performed before the search. F (float or tuple): burst search parameter F. If it is a 2-element tuple, specifies F separately for `ph_sel1` and `ph_sel2`. m (int or tuple): Burst search parameter m. If it is a 2-element tuple, specifies m separately for `ph_sel1` and `ph_sel2`. min_rate_cps (float or tuple): min. rate in cps for burst detection. If not None, min_rate_cps overrides any value passed in `F`. If a 2-element tuple specifies min_rate_cps separately for `ph_sel1` and `ph_sel2`. In multispot data, it can also be an array (or a 2-tuple or arrays) with size equal to the number of spots. c (float): parameter used set the definition of the rate estimatator. See `c` parameter in :meth:`.burstlib.Data.burst_search` for details. ph_sel1 (Ph_sel object): photon selections used for bursts search 1. ph_sel2 (Ph_sel object): photon selections used for bursts search 2. mute (bool): if True nothing is printed. Default: False. Return: A new `Data` object containing bursts from the and-gate search. See also :meth:`fretbursts.burstlib.Data.burst_search`. """ dx_d = dx.copy(mute=mute) dx_a = dx.copy(mute=mute) dx_and = dx.copy(mute=mute) dx_and.delete_burst_data() def _get_args(x): try: assert len(x) == 2 x1, x2 = x except TypeError: x1 = x2 = x return x1, x2 m1, m2 = _get_args(m) F1, F2 = _get_args(F) min_rate_cps1, min_rate_cps2 = _get_args(min_rate_cps) dx_d.burst_search(L=m1, m=m1, F=F1, min_rate_cps=min_rate_cps1, c=c, ph_sel=ph_sel1, compact=compact, computefret=False, mute=mute) dx_a.burst_search(L=m2, m=m2, F=F2, min_rate_cps=min_rate_cps2, c=c, ph_sel=ph_sel2, compact=compact, computefret=False, mute=mute) mburst_and = [] for bursts_d, bursts_a in zip(dx_d.mburst, dx_a.mburst): mburst_and.append(bursts_d.and_gate(bursts_a)) dx_and.add(mburst=mburst_and) pprint(" - Calculating burst periods ...", mute) dx_and._calc_burst_period() # writes bp pprint("[DONE]\n", mute) # Note: dx_and.bg_bs will not be meaningful dx_and.add(m=m, L=m, F=F, P=None, min_rate_cps=min_rate_cps, ph_sel='AND-gate') dx_and.add(bg_corrected=False, leakage_corrected=False, dir_ex_corrected=False, dithering=False) pprint(" - Counting D and A ph and calculating FRET ... \n", mute) dx_and.calc_fret(count_ph=True, corrections=True, mute=mute) pprint(" [DONE Counting D/A]\n", mute) return dx_and ## # Burst asymmetry # def get_burst_photons(d, ich=0, ph_sel=Ph_sel('all')): """Return a list of arrays of photon timestamps in each burst. Arguments: d (Data): Data() object ich (int): channel index ph_sel (Ph_sel): photon selection. It allows to select timestamps from a specific photon selection. Example ph_sel=Ph_sel(Dex='Dem'). See :mod:`fretbursts.ph_sel` for details. Returns: A list of arrays of photon timestamps (one array per burst). """ bursts = d.mburst[ich] i_start, i_end = bursts.istart, bursts.istop ph_times = d.get_ph_times(ich) burst_slices = [slice(i1, i2 + 1) for i1, i2 in zip(i_start, i_end)] burst_photons = [ph_times[slice_i] for slice_i in burst_slices] if ph_sel != Ph_sel('all'): ph_times_mask = d.get_ph_mask(ich, ph_sel=ph_sel) photon_masks = [ph_times_mask[slice_i] for slice_i in burst_slices] burst_photons = [ph[mask] for ph, mask in zip(burst_photons, photon_masks)] return burst_photons def ph_burst_stats(d, ich=0, func=np.mean, ph_sel=Ph_sel('all')): """Applies function `func` to the timestamps of each burst. Arguments: d (Data): Data() object ich (int): channel index func (function): a function that take an array of burst-timestamps and return a scalar. Default `numpy.mean`. ph_sel (Ph_sel): photon selection. It allows to select timestamps from a specific photon selection. Default Ph_sel('all'). See :mod:`fretbursts.ph_sel` for details. Returns: An array containing per-burst timestamp statistics. """ burst_photons = get_burst_photons(d, ich, ph_sel=ph_sel) stats = [func(times) for times in burst_photons] return np.array(stats) def asymmetry(dx, ich=0, func=np.mean, dropnan=True): """Compute an asymmetry index for each burst in channel `ich`. It computes each burst the difference func({t_D}) - func({t_A}) where `func` is a function (default `mean`) that computes some statistics on the timestamp and {t_D} and {t_A} are the sets of D or A timestamps in a bursts (during D excitation). Arguments: d (Data): Data() object ich (int): channel index func (function): the function to be used to extract D and A photon statistics in each bursts. Returns: An arrays of photon timestamps (one array per burst). """ stats_d = ph_burst_stats(dx, ich=ich, func=func, ph_sel=Ph_sel(Dex='Dem')) stats_a = ph_burst_stats(dx, ich=ich, func=func, ph_sel=Ph_sel(Dex='Aem')) #b_size = d.burst_sizes(ich, add_naa=False) #b_width = d.mburst[ich].width burst_asym = (stats_d - stats_a)*dx.clk_p*1e3 if dropnan: burst_asym = burst_asym[-np.isnan(burst_asym)] return burst_asym