python source code of tad

from __future__ import division
import numpy as np
import progressbar
import itertools
from future.utils import string_types
import warnings

import logging
logger = logging.getLogger(__name__)


class GenomicRegion(object):
    """
    Class representing a genomic region.

    .. attribute:: chromosome

        Name of the chromosome this region is located on

    .. attribute:: start

        Start position of the region in base pairs

    .. attribute:: end

        End position of the region in base pairs

    .. attribute:: strand

        Strand this region is on (+1, -1)

    .. attribute:: ix

        Index of the region in the context of all genomic
        regions.

    """

    def __init__(self, start, end, chromosome=None, ix=None):
        """
        Initialize this object.

        :param start: Start position of the region in base pairs
        :param end: End position of the region in base pairs
        :param chromosome: Name of the chromosome this region is located on
        :param ix: Index of the region in the context of all genomic
                   regions.
        """
        self.start = start
        self.end = end
        self.chromosome = chromosome
        self.ix = ix

    @classmethod
    def from_string(cls, region_string):
        """
        Convert a string into a :class:`~GenomicRegion`.

        This is a very useful convenience function to quickly
        define a :class:`~GenomicRegion` object from a descriptor
        string.

        :param region_string: A string of the form
                              <chromosome>[:<start>-<end>[:<strand>]]
                              (with square brackets indicating optional
                              parts of the string). If any optional
                              part of the string is omitted, intuitive
                              defaults will be chosen.
        :return: :class:`~GenomicRegion`
        """
        chromosome = None
        start = None
        end = None

        # strip whitespace
        no_space_region_string = "".join(region_string.split())
        fields = no_space_region_string.split(':')

        if len(fields) > 3:
            raise ValueError("Genomic range string must be of the form <chromosome>[:<start>-<end>:[<strand>]]")

        # there is chromosome information
        if len(fields) > 0:
            chromosome = fields[0]

        # there is range information
        if len(fields) > 1 and fields[1] != '':
            start_end_bp = fields[1].split('-')
            if len(start_end_bp) > 0:
                try:
                    start = int(start_end_bp[0])
                except ValueError:
                    raise ValueError("Start of genomic range must be integer")

            if len(start_end_bp) > 1:
                try:
                    end = int(start_end_bp[1])
                except ValueError:
                    raise ValueError("End of genomic range must be integer")

                if not end > start:
                    raise ValueError("The end coordinate must be bigger than the start.")

        return cls(start=start, end=end, chromosome=chromosome)

    def overlaps(self, region):
        """
        Check if this region overlaps with the specified region.

        :param region: :class:`~GenomicRegion` object or string
        """
        if isinstance(region, str):
            region = GenomicRegion.from_string(region)

        if region.chromosome != self.chromosome:
            return False

        if region.start <= self.end and region.end >= self.start:
            return True
        return False

    def contains(self, region):
        """
        Check if the specified region is completely contained in this region.

        :param region: :class:`~GenomicRegion` object or string
        """
        if isinstance(region, str):
            region = GenomicRegion.from_string(region)

        if region.chromosome != self.chromosome:
            return False

        if region.start >= self.start and region.end <= self.end:
            return True
        return False

    def _equals(self, region):
        if region.chromosome != self.chromosome:
            return False
        if region.start != self.start:
            return False
        if region.end != self.end:
            return False
        return True

    def __eq__(self, other):
        return self._equals(other)

    def __ne__(self, other):
        return not self._equals(other)

    def copy(self):
        """
        Make a copy of this GenomicRegion object.
        """
        return GenomicRegion(chromosome=self.chromosome, start=self.start, end=self.end)


def sub_regions(regions, region):
    """
    Get regions from a list the overlap with another region.

    :param regions: List of :class:`~GenomicRegion`
    :param region: :class:`~GenomicRegion` used for overlap calculation
    """
    if isinstance(region, string_types):
        region = GenomicRegion.from_string(region)

    sr = []
    start_ix = None
    end_ix = None
    for i, r in enumerate(regions):
        if r.chromosome == region.chromosome and r.start <= region.end and r.end >= region.start:
            if start_ix is None:
                start_ix = i
            end_ix = i
            sr.append(r.copy())
        else:
            if end_ix is not None:
                break

    if start_ix is None or end_ix is None:
        raise ValueError("Region not found in dataset! {}:{}-{}".format(region.chromosome, region.start, region.end))

    return sr, start_ix, end_ix


def sub_matrix_regions(hic_matrix, regions, region):
    """
    Get a square sub Hi-C matrix that overlaps a given region.

    :param hic_matrix: A square numpy array
    :param regions: List of :class:`~GenomicRegion`
    :param region: :class:`~GenomicRegion` used for overlap calculation
    """
    sr, start_ix, end_ix = sub_regions(regions, region)

    if start_ix is None:
        return np.empty((0, 0)), sr

    return np.copy(hic_matrix[start_ix:end_ix+1, start_ix:end_ix+1]), sr


def sub_data_regions(data, regions, region):
    """
    Get a sub data matrix that overlaps a given region.

    :param data: a numpy array
    :param regions: List of :class:`~GenomicRegion`
    :param region: :class:`~GenomicRegion` used for overlap calculation
    """
    sr, start_ix, end_ix = sub_regions(regions, region)

    if start_ix is None:
        return np.empty((0, 0)), sr

    if not isinstance(data, np.ndarray):
        data = np.array(data)

    return np.copy(data[:, start_ix:end_ix+1]), sr


def sub_vector_regions(data, regions, region):
    """
    Get a sub-vector that overlaps a given region

    :param data: a numpy vector
    :param regions: List of :class:`~GenomicRegion`
    :param region: :class:`~GenomicRegion` used for overlap calculation
    """
    sr, start_ix, end_ix = sub_regions(regions, region)

    if start_ix is None:
        return np.empty((0, 0)), sr

    if not isinstance(data, np.ndarray):
        data = np.array(data)

    return np.copy(data[start_ix:end_ix+1]), sr


def load_matrix(file_name, size=None, sep=None, square=True, ix_converter=None, region_range=None):
    try:  # numpy binary format
        m = np.load(file_name)
        if region_range is not None:
            s, e = region_range
            m = m[s:e+1, s:e+1].copy()
    except (IOError, ValueError):  # not an .npy file

        # check number of fields in file
        with open(file_name, 'r') as f:
            line = f.readline()
            while line.startswith('#'):
                line = f.readline()
            line = line.rstrip()
            n_fields = len(line.split(sep))

        if n_fields > 3 or not square:  # square matrix format
            m = np.loadtxt(file_name)
            if region_range is not None:
                s, e = region_range
                m = m[s:e + 1, s:e + 1].copy()
        else:
            if size is None:
                if region_range is not None:
                    size = region_range[1] - region_range[0] + 1
                else:
                    raise ValueError("Must provide matrix size when importing from sparse matrix notation "
                                     "(HicPro, etc)")

            m = np.zeros((size, size))
            with open(file_name, 'r') as f:
                for line in f:
                    if line.startswith("#"):
                        continue
                    line = line.rstrip()
                    fields = line.split(sep)
                    if ix_converter is None:
                        source, sink, weight = int(fields[0]), int(fields[1]), float(fields[2])
                    else:
                        source = ix_converter[fields[0]]
                        sink = ix_converter[fields[1]]
                        weight = float(fields[2])

                    if region_range is not None:
                        if source < region_range[0] or source > region_range[1]:
                            continue
                        if sink < region_range[0] or sink > region_range[1]:
                            continue
                        source = source - region_range[0]
                        sink = sink - region_range[0]

                    m[source, sink] = weight
                    m[sink, source] = weight

    if square and m.shape[0] != m.shape[1]:
        raise ValueError("Matrix dimensions do not match! ({})".format(m.shape))

    return m


def load_chromosome_matrix(file_name, regions, chromosome, **kwargs):
    region_range = [None, 0]
    for i, region in enumerate(regions):
        if region.chromosome == chromosome:
            if region_range[0] is None:
                region_range[0] = i
            region_range[0] = min(region_range[0], i)
            region_range[1] = max(region_range[1], i)
    logger.debug('Region range for {}: {} - {}'.format(chromosome, region_range[0], region_range[1]))
    return load_matrix(file_name, region_range=region_range, **kwargs)


def load_regions(file_name, sep=None):
    regions = []
    ix_converter = None
    with open(file_name, 'r') as f:
        for i, line in enumerate(f):
            line = line.rstrip()
            fields = line.split(sep)
            if len(fields) > 2:
                chromosome = fields[0]
                start = int(fields[1]) + 1
                end = int(fields[2])
                ix = i
                if len(fields) > 3 and fields[3] != '.':  # HicPro
                    if ix_converter is None:
                        ix_converter = dict()
                    if fields[3] in ix_converter:
                        raise ValueError("name column in region BED must "
                                         "only contain unique values! ({})".format(fields[3]))
                    ix_converter[fields[3]] = ix
                regions.append(GenomicRegion(chromosome=chromosome, start=start, end=end, ix=ix))
    return regions, ix_converter


def _get_boundary_distances(regions):
    """
    Return the distances of each region to the boundaries of its chromosome.
    """
    n_bins = len(regions)
    # find distances to chromosome boundaries in bins
    boundary_dist = np.zeros(n_bins, dtype=int)
    last_chromosome = None
    last_chromosome_index = 0
    for i, region in enumerate(regions):
        chromosome = region.chromosome
        if last_chromosome is not None and chromosome != last_chromosome:
            chromosome_length = i-last_chromosome_index
            for j in range(chromosome_length):
                boundary_dist[last_chromosome_index+j] = min(j, i-last_chromosome_index-1-j)
            last_chromosome_index = i
        last_chromosome = chromosome
    chromosome_length = n_bins-last_chromosome_index
    for j in range(chromosome_length):
        boundary_dist[last_chromosome_index+j] = min(j, n_bins-last_chromosome_index-1-j)

    return boundary_dist


def directionality_index(hic, regions=None, window_size=2000000):
    """
    Calculate the directionality index as in Dixon et al. (2012).

    :param hic: A Hi-C matrix in the form of a square numpy array
    :param regions: A list of :class:`~GenomicRegion`s - if omitted, will create a dummy list
    :param window_size: A window size in base pairs
    """
    if regions is None:
        for i in range(hic.shape[0]):
            regions.append(GenomicRegion(chromosome='', start=i, end=i))

    bin_size = regions[0].end-regions[0].start+1
    bin_distance = int(window_size/bin_size)
    if window_size % bin_size > 0:
        bin_distance += 1

    n_bins = len(regions)
    boundary_dist = _get_boundary_distances(regions)

    left_sums = np.zeros(n_bins)
    right_sums = np.zeros(n_bins)
    for source in range(hic.shape[0]):
        for sink in range(source, hic.shape[1]):
            try:
                if hic.mask[source, sink]:
                    continue
            except AttributeError:
                pass

            weight = hic[source, sink]

            if source == sink:
                continue
            if sink - source <= bin_distance:
                if boundary_dist[sink] >= sink-source:
                    left_sums[sink] += weight
                if boundary_dist[source] >= sink-source:
                    right_sums[source] += weight

    di = np.zeros(n_bins)
    for i in range(n_bins):
        A = left_sums[i]
        B = right_sums[i]
        E = (A+B)/2
        if E != 0 and B-A != 0:
            di[i] = ((B-A)/abs(B-A)) * ((((A-E)**2)/E) + (((B-E)**2)/E))

    return di


def masked_matrix(matrix, all_zero=False):
    """
    Returns masked version of HicMatrix. By default, all entries in zero-count
    rows and columns are masked.

    :param matrix: A numpy 2D matrix
    :param all_zero: Mask ALL zero-count entries
    :returns: MaskedArray with zero entries masked
    """
    if all_zero:
        return np.ma.MaskedArray(matrix, mask=np.isclose(matrix, 0.))
    col_zero = np.isclose(np.sum(matrix, axis=0), 0.)
    row_zero = np.isclose(np.sum(matrix, axis=1), 0.)
    mask = np.zeros(matrix.shape, dtype=np.bool_)
    mask[:, col_zero] = np.True_
    mask[row_zero, :] = np.True_
    return np.ma.MaskedArray(matrix, mask=mask)


def kth_diag_indices(n, k):
    """
    Return indices of bins k steps away from the diagonal.
    (from http://stackoverflow.com/questions/10925671/numpy-k-th-diagonal-indices)
    """

    rows, cols = np.diag_indices(n)
    if k < 0:
        return rows[:k], cols[-k:]
    elif k > 0:
        return rows[k:], cols[:-k]
    else:
        return rows, cols


def impute_missing_bins(hic_matrix, regions=None, per_chromosome=True, stat=np.ma.mean):
    """
    Impute missing contacts in a Hi-C matrix.

    For inter-chromosomal data uses the mean of all inter-chromosomal contacts,
    for intra-chromosomal data uses the mean of intra-chromosomal counts at the corresponding diagonal.

    :param hic_matrix: A square numpy array
    :param regions: A list of :class:`~GenomicRegion`s - if omitted, will create a dummy list
    :param per_chromosome: Do imputation on a per-chromosome basis (recommended)
    :param stat: The aggregation statistic to be used for imputation, defaults to the mean.
    """
    if regions is None:
        for i in range(hic_matrix.shape[0]):
            regions.append(GenomicRegion(chromosome='', start=i, end=i))

    chr_bins = dict()
    for i, region in enumerate(regions):
        if region.chromosome not in chr_bins:
            chr_bins[region.chromosome] = [i, i]
        else:
            chr_bins[region.chromosome][1] = i

    n = len(regions)
    if not hasattr(hic_matrix, "mask"):
        hic_matrix = masked_matrix(hic_matrix)

    imputed = hic_matrix.copy()
    if per_chromosome:
        for c_start, c_end in chr_bins.itervalues():
            # Correcting intrachromoc_startmal contacts by mean contact count at each diagonal
            for i in range(c_end - c_start):
                ind = kth_diag_indices(c_end - c_start, -i)
                diag = imputed[c_start:c_end, c_start:c_end][ind]
                diag[diag.mask] = stat(diag)
                imputed[c_start:c_end, c_start:c_end][ind] = diag
            # Correcting interchromoc_startmal contacts by mean of all contact counts between
            # each set of chromoc_startmes
            for other_start, other_end in chr_bins.itervalues():
                # Only correct upper triangle
                if other_start <= c_start:
                    continue
                inter = imputed[c_start:c_end, other_start:other_end]
                inter[inter.mask] = stat(inter)
                imputed[c_start:c_end, other_start:other_end] = inter
    else:
        for i in range(n):
            diag = imputed[kth_diag_indices(n, -i)]
            diag[diag.mask] = stat(diag)
            imputed[kth_diag_indices(n, -i)] = diag
    # Copying upper triangle to lower triangle
    imputed[np.tril_indices(n)] = imputed.T[np.tril_indices(n)]
    return imputed


def _apply_sliding_func(a, window, func=np.ma.mean):
    """
    Apply function on a sliding window over an array, ignoring Numpy NaN values.

    :param a: Numpy array on which function is applied
    :param window: The sliding window is i - window:i + window + 1
                   so total window is twice this parameter.
    :param func: Aggregation function to apply
    """
    out = np.empty(len(a))
    for i in range(len(a)):
        window_start = max(0, i - window)
        window_end = min(len(a), i + window + 1)
        cur_window = np.array(a[window_start:window_end])
        out[i] = func(cur_window[~np.isnan(cur_window)])
    return out


def insulation_index(hic_matrix, regions=None, window_size=500000, relative=False, mask_thresh=.5,
                     aggr_func=np.nanmean, impute_missing=False, normalize=False,
                     normalization_window=None, gradient=False):
    """
    Calculate the insulation index as in Crane et al. (2015)

    :param hic_matrix: A square numpy array
    :param regions: A list of :class:`~GenomicRegion`s - if omitted, will create a dummy list
    :param window_size: A window size in base pairs
    :param relative: Calculate insulation index relative to upstream and downstream contact strength
    :param mask_thresh: Set the threshold for masked value threshold
    :param aggr_func: Aggregation function used for index calculation, defaults to the mean
    :param impute_missing: Impute missing values, defaults to False
    :param normalize: Normalize index at each region to the mean number of contacts across a larger region
                      and log2-transform data
    :param normalization_window: Window (in number of regions) to normalize contacts to (see normalize).
                                 If None, the whole chromosome will be used for normalization.
    :param gradient: Return the first derivative of the insulation index
    """
    if regions is None:
        for i in range(hic_matrix.shape[0]):
            regions.append(GenomicRegion(chromosome='', start=i, end=i))

    bin_size = regions[0].end-regions[0].start+1
    d = int(window_size/bin_size)
    if window_size % bin_size > 0:
        d += 1

    chr_bins = dict()
    for i, region in enumerate(regions):
        if region.chromosome not in chr_bins:
            chr_bins[region.chromosome] = [i, i]
        else:
            chr_bins[region.chromosome][1] = i

    if impute_missing:
        hic_matrix = impute_missing_bins(hic_matrix, regions=regions)

    is_masked = False
    if hasattr(hic_matrix, 'mask'):
        is_masked = True

    skipped = 0
    last_chromosome = None
    ins_by_chromosome = []
    for i, r in enumerate(regions):
        if r.chromosome != last_chromosome:
            last_chromosome = r.chromosome
            ins_by_chromosome.append(list())

        # too close to chromosome edge
        if (i - chr_bins[r.chromosome][0] < d or
                chr_bins[r.chromosome][1] - i <= d + 1):
            ins_by_chromosome[-1].append(np.nan)
            # ins_matrix[i] = np.nan
            continue

        # masked region
        if is_masked and hic_matrix.mask[i, i]:
            ins_by_chromosome[-1].append(np.nan)
            # ins_matrix[i] = np.nan
            continue

        up_rel_slice = (slice(i - d, i), slice(i - d, i))
        down_rel_slice = (slice(i + 1, i + d + 1), slice(i + 1, i + d + 1))
        ins_slice = (slice(i + 1, i + d + 1), slice(i - d, i))

        if (is_masked and ((relative and np.sum(hic_matrix.mask[up_rel_slice]) > d*d*mask_thresh) or
                (relative and np.sum(hic_matrix.mask[down_rel_slice]) > d*d*mask_thresh) or
                np.sum(hic_matrix.mask[ins_slice]) > d*d*mask_thresh)):
            # If too close to the edge of chromosome or
            # if more than half of the entries in this quadrant are masked (unmappable)
            # exclude it from the analysis
            skipped += 1
            ins_by_chromosome[-1].append(np.nan)
            # ins_matrix[i] = np.nan
            continue

        if not relative:
            if impute_missing and is_masked:
                s = aggr_func(hic_matrix[ins_slice].data)
                ins_by_chromosome[-1].append(s)
                # ins_matrix[i] = s
            else:
                s = aggr_func(hic_matrix[ins_slice])
                ins_by_chromosome[-1].append(s)
                # ins_matrix[i] = s
        else:
            if not impute_missing and not is_masked:
                s = (aggr_func(hic_matrix[ins_slice]) /
                     aggr_func(np.ma.dstack((hic_matrix[up_rel_slice],
                                             hic_matrix[down_rel_slice]))))
                # ins_matrix[i] = s
                ins_by_chromosome[-1].append(s)
            else:
                s = (aggr_func(hic_matrix[ins_slice].data) /
                     aggr_func(np.ma.dstack((hic_matrix[up_rel_slice].data,
                                             hic_matrix[down_rel_slice].data))))
                # ins_matrix[i] = s
                ins_by_chromosome[-1].append(s)

    if normalize:
        for i in range(len(ins_by_chromosome)):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                if normalization_window is not None:
                    ins_by_chromosome[i] = np.ma.log2(ins_by_chromosome[i]/_apply_sliding_func(
                        ins_by_chromosome[i], normalization_window, func=np.nanmean))
                else:
                    ins_by_chromosome[i] = np.ma.log2(ins_by_chromosome[i]/np.nanmean(ins_by_chromosome[i]))
    if gradient:
        for i in range(len(ins_by_chromosome)):
            ins_by_chromosome[i] = np.gradient(ins_by_chromosome[i])

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        ins_matrix = np.array(list(itertools.chain.from_iterable(ins_by_chromosome)))

    return ins_matrix


def normalised_insulation_index(hic_matrix, regions=None, window_size=500000, normalisation_window=None):
    return insulation_index(hic_matrix, regions=regions, window_size=window_size, normalize=True,
                            normalization_window=normalisation_window)


def data_array(hic_matrix, regions, tad_method=insulation_index,
               window_sizes=None, **kwargs):
    """
    Calculate an index for a range of window sizes.

    :param hic_matrix: A square numpy array
    :param regions: A list of :class:`~GenomicRegion`s - if omitted, will create a dummy list
    :param tad_method: The method used for index calculation
    :param window_sizes: A list of window sizes in base pairs
    :param kwargs: Parameters passed to tad_method
    """
    if window_sizes is None:
        window_sizes = [int(i) for i in np.logspace(4, 6, 100)]

    if regions is None:
        for i in range(hic_matrix.shape[0]):
            regions.append(GenomicRegion(chromosome='', start=i, end=i))

    try:
        l = len(window_sizes)
    except TypeError:
        l = None

    pb = progressbar.ProgressBar(max_value=l)
    if l is not None:
        pb.start()

    ws = []
    da = []
    for i, window_size in enumerate(window_sizes):
        if l is not None:
            pb.update(i)
        ws.append(window_size)
        da.append(tad_method(hic_matrix, regions, window_size, **kwargs))

    if l is not None:
        pb.finish()

    return np.array(da), ws


def _border_type(i, values):
    """
    Returns border type:

    :param i: index of potential border
    :param values: insulation index values
    :return: if border: 1 or -1, else 0. 1 if derivative at border is >0, -1 otherwise
    """

    if i == 0:
        return 1
    if i == len(values)-1:
        return -1

    if values[i-1] <= 0 <= values[i+1]:
        return 1
    if values[i+1] <= 0 <= values[i-1]:
        return -1
    return 0


def call_tad_borders(ii_results, cutoff=0):
    """
    Calls TAD borders using the first derivative of the insulation index.

    :param ii_results: (raw) insulation index results, numpy vector
    :param cutoff: raw insulation index threshold for "true" TAD boundaries
    """
    tad_borders = []
    g = np.gradient(ii_results)
    for i in range(len(ii_results)):
        border_type = _border_type(i, g)
        if border_type == 1 and ii_results[i] <= cutoff:
            tad_borders.append(i)
    return tad_borders


def call_tads_insulation_index(ii_results, cutoff, regions=None):
    """
    Call TADs from insulation index vector.

    :param ii_results: (raw) insulation index results, numpy vector
    :param cutoff: raw insulation index threshold for "true" TADs
    :param regions: A list of :class:`~GenomicRegion`s - if omitted, will create a dummy list
    """
    if regions is None:
        regions = []
        for i in range(len(ii_results)):
            regions.append(GenomicRegion(chromosome='', start=i, end=i))

    if len(regions) != len(ii_results):
        raise ValueError("Insulation index results and regions must be the same size!")

    borders = call_tad_borders(ii_results, cutoff=cutoff)

    tad_regions = []
    previous = None
    for border in borders:
        if previous is not None:
            found_max = False
            for j in range(previous, border+1):
                if ii_results[j] >= cutoff:
                    found_max = True

            if found_max:
                first_region = regions[previous]
                second_region = regions[border]
                if first_region.chromosome != second_region.chromosome:
                    previous = border
                    continue
                tad_regions.append(GenomicRegion(chromosome=first_region.chromosome,
                                                 start=first_region.start, end=second_region.end))

        previous = border
    return tad_regions


def call_tads_directionality_index(di_results, cutoff, regions=None):
    """
    Call TADs from directionality index vector.

    :param di_results: (raw) directionality index results, numpy vector
    :param cutoff: raw directionality index threshold for "true" biases
    :param regions: A list of :class:`~GenomicRegion`s - if omitted, will create a dummy list
    """
    if regions is None:
        regions = []
        for i in range(len(di_results)):
            regions.append(GenomicRegion(chromosome='', start=i, end=i))

    tad_regions = []
    last_upstream_bias_region = None
    last_downstream_bias_region = None
    for i, value in enumerate(di_results):
        current_region = regions[i]
        if last_upstream_bias_region is not None and current_region.chromosome != last_upstream_bias_region.chromosome:
            tad_regions.append(GenomicRegion(chromosome=last_upstream_bias_region.chromosome,
                                             start=last_upstream_bias_region.start,
                                             end=regions[i-1].end))
            last_upstream_bias_region = None
            last_downstream_bias_region = None

        # look for upstream bias site
        if value >= cutoff:
            # side-by-side TAD
            if last_downstream_bias_region is not None and last_upstream_bias_region is not None:
                tad = GenomicRegion(chromosome=last_downstream_bias_region.chromosome,
                                    start=last_upstream_bias_region.start,
                                    end=last_downstream_bias_region.end)
                tad_regions.append(tad)
                last_upstream_bias_region = None
                last_downstream_bias_region = None

            # beginning of TAD
            if last_upstream_bias_region is None:
                last_upstream_bias_region = current_region

        if value <= -1*cutoff:
            # potential end of TAD
            if last_upstream_bias_region is not None:
                last_downstream_bias_region = current_region

        if abs(value) < cutoff:
            # back to zero
            if last_downstream_bias_region is not None and last_upstream_bias_region is not None:
                tad = GenomicRegion(chromosome=last_downstream_bias_region.chromosome,
                                    start=last_upstream_bias_region.start,
                                    end=last_downstream_bias_region.end)
                tad_regions.append(tad)
                last_upstream_bias_region = None
                last_downstream_bias_region = None
    return tad_regions