python source code of thresholding

"""Utilities for probabilistic error control at voxel- and
cluster-level in brain imaging: cluster-level thresholding, false
discovery rate control, false discovery proportion in clusters.

Author: Bertrand Thirion, 2015 -- 2019

"""
import warnings

import numpy as np

from nilearn.input_data import NiftiMasker
from nilearn.image import math_img
from scipy.ndimage import label
from scipy.stats import norm

from nistats.utils import get_data


def _compute_hommel_value(z_vals, alpha, verbose=False):
    """Compute the All-Resolution Inference hommel-value"""
    if alpha < 0 or alpha > 1:
        raise ValueError('alpha should be between 0 and 1')
    z_vals_ = - np.sort(- z_vals)
    p_vals = norm.sf(z_vals_)
    n_samples = len(p_vals)

    if len(p_vals) == 1:
        return p_vals[0] > alpha
    if p_vals[0] > alpha:
        return n_samples
    slopes = (alpha - p_vals[: - 1]) / np.arange(n_samples, 1, -1)
    slope = np.max(slopes)
    hommel_value = np.trunc(n_samples + (alpha - slope * n_samples) / slope)
    if verbose:
        try:
            from matplotlib import pyplot as plt
        except ImportError:
            warnings.warn('"verbose" option requires the package Matplotlib.'
                          'Please install it using `pip install matplotlib`.')
        else:
            plt.figure()
            plt.plot(p_vals, 'o')
            plt.plot([n_samples - hommel_value, n_samples], [0, alpha])
            plt.plot([0, n_samples], [0, 0], 'k')
            plt.show(block=False)
    return np.minimum(hommel_value, n_samples)


def _true_positive_fraction(z_vals, hommel_value, alpha):
    """Given a bunch of z-avalues, return the true positive fraction

    Parameters
    ----------
    z_vals: array,
            a set of z-variates from which the FDR is computed
    hommel_value: int,
           the Hommel value, used in the computations
    alpha: float,
           desired FDR control

    Returns
    -------
    threshold: float,
               Estimated true positive fraction in the set of values
    """
    z_vals_ = - np.sort(- z_vals)
    p_vals = norm.sf(z_vals_)
    n_samples = len(p_vals)
    c = np.ceil((hommel_value * p_vals) / alpha)
    unique_c, counts = np.unique(c, return_counts=True)
    criterion = 1 - unique_c + np.cumsum(counts)
    proportion_true_discoveries = np.maximum(0, criterion.max() / n_samples)
    return proportion_true_discoveries


def fdr_threshold(z_vals, alpha):
    """ return the Benjamini-Hochberg FDR threshold for the input z_vals

    Parameters
    ----------
    z_vals: array,
            a set of z-variates from which the FDR is computed
    alpha: float,
           desired FDR control

    Returns
    -------
    threshold: float,
               FDR-controling threshold from the Benjamini-Hochberg procedure
    """
    if alpha < 0 or alpha > 1:
        raise ValueError(
            'alpha should be between 0 and 1. {} was provided'.format(alpha))
    z_vals_ = - np.sort(- z_vals)
    p_vals = norm.sf(z_vals_)
    n_samples = len(p_vals)
    pos = p_vals < alpha * np.linspace(
        .5 / n_samples, 1 - .5 / n_samples, n_samples)
    if pos.any():
        return (z_vals_[pos][-1] - 1.e-12)

    return np.infty


def cluster_level_inference(stat_img, mask_img=None,
                            threshold=3., alpha=.05, verbose=False):
    """ Report the proportion of active voxels for all clusters
    defined by the input threshold.

    Parameters
    ----------
    stat_img : Niimg-like object or None, optional
       statistical image (presumably in z scale)

    mask_img : Niimg-like object, optional,
        mask image

    threshold: list of floats, optional
       cluster-forming threshold in z-scale.

    alpha: float or list, optional
        level of control on the true positive rate, aka true dsicovery
        proportion

    verbose: bool, optional
        verbosity mode

    Returns
    -------
    proportion_true_discoveries_img: Nifti1Image,
        the statistical map that gives the true positive

    Note
    ----
    This implements the method described in:

    Rosenblatt JD, Finos L, Weeda WD, Solari A, Goeman JJ. All-Resolutions
    Inference for brain imaging. Neuroimage. 2018 Nov 1;181:786-796. doi:
    10.1016/j.neuroimage.2018.07.060
    """
    if not isinstance(threshold, list):
        threshold = [threshold]

    if mask_img is None:
        masker = NiftiMasker(mask_strategy='background').fit(stat_img)
    else:
        masker = NiftiMasker(mask_img=mask_img).fit()
    stats = np.ravel(masker.transform(stat_img))
    hommel_value = _compute_hommel_value(stats, alpha, verbose=verbose)

    # embed it back to 3D grid
    stat_map = get_data(masker.inverse_transform(stats))

    # Extract connected components above threshold
    proportion_true_discoveries_img = math_img('0. * img', img=stat_img)
    proportion_true_discoveries = masker.transform(
        proportion_true_discoveries_img).ravel()

    for threshold_ in sorted(threshold):
        label_map, n_labels = label(stat_map > threshold_)
        labels = label_map[get_data(masker.mask_img_) > 0]
        for label_ in range(1, n_labels + 1):
            # get the z-vals in the cluster
            cluster_vals = stats[labels == label_]
            proportion = _true_positive_fraction(cluster_vals, hommel_value,
                                                 alpha)
            proportion_true_discoveries[labels == label_] = proportion

    proportion_true_discoveries_img = masker.inverse_transform(
        proportion_true_discoveries)
    return proportion_true_discoveries_img


def map_threshold(stat_img=None, mask_img=None, alpha=.001, threshold=3.,
                  height_control='fpr', cluster_threshold=0, two_sided=True):
    """ Compute the required threshold level and return the thresholded map

    Parameters
    ----------
    stat_img : Niimg-like object or None, optional
       statistical image (presumably in z scale)
       whenever height_control is 'fpr' or None,
       stat_img=None is acceptable.
       If it is 'fdr' or 'bonferroni', an error is raised if stat_img is None.

    mask_img : Niimg-like object, optional,
        mask image

    alpha: float or list, optional
        number controlling the thresholding (either a p-value or q-value).
        Its actual meaning depends on the height_control parameter.
        This function translates alpha to a z-scale threshold.

    threshold: float, optional
       desired threshold in z-scale.
       This is used only if height_control is None

    height_control: string, or None optional
        false positive control meaning of cluster forming
        threshold: None|'fpr'|'fdr'|'bonferroni'

    cluster_threshold: float, optional
        cluster size threshold. In the returned thresholded map,
        sets of connected voxels (`clusters`) with size smaller
        than this number will be removed.

    two_sided: Bool, optional,
        Whether the thresholding should yield both positive and negative
        part of the maps.
        In that case, alpha is corrected by a factor of 2.
        Defaults to True.

    Returns
    -------
    thresholded_map : Nifti1Image,
        the stat_map thresholded at the prescribed voxel- and cluster-level

    threshold: float,
        the voxel-level threshold used actually

    Note
    ----
    If the input image is not z-scaled (i.e. some z-transformed statistic)
    the computed threshold is not rigorous and likely meaningless
    """
    height_control_methods = ['fpr', 'fdr', 'bonferroni',
                              'all-resolution-inference', None]
    if height_control not in height_control_methods:
        raise ValueError(
            "height control should be one of {0}", height_control_methods)

    # if two-sided, correct alpha by a factor of 2
    alpha_ = alpha / 2 if two_sided else alpha

    # if height_control is 'fpr' or None, we don't need to look at the data
    # to compute the threshold
    if height_control == 'fpr':
        threshold = norm.isf(alpha_)

    # In this case, and if stat_img is None, we return
    if stat_img is None:
        if height_control in ['fpr', None]:
            return None, threshold
        else:
            raise ValueError('Map_threshold requires stat_img not to be None'
                             'when the height_control procedure '
                             'is "bonferroni" or "fdr"')

    if mask_img is None:
        masker = NiftiMasker(mask_strategy='background').fit(stat_img)
    else:
        masker = NiftiMasker(mask_img=mask_img).fit()
    stats = np.ravel(masker.transform(stat_img))
    n_voxels = np.size(stats)

    # Thresholding
    if two_sided:
        # replace stats by their absolute value after storing the sign
        sign = np.sign(stats)
        stats = np.abs(stats)

    if height_control == 'fdr':
        threshold = fdr_threshold(stats, alpha_)
    elif height_control == 'bonferroni':
        threshold = norm.isf(alpha_ / n_voxels)
    stats *= (stats > threshold)
    if two_sided:
        stats *= sign

    # embed it back to 3D grid
    stat_map = get_data(masker.inverse_transform(stats))

    # Extract connected components above threshold
    label_map, n_labels = label(np.abs(stat_map) > threshold)
    labels = label_map[get_data(masker.mask_img_) > 0]

    for label_ in range(1, n_labels + 1):
        if np.sum(labels == label_) < cluster_threshold:
            stats[labels == label_] = 0

    return masker.inverse_transform(stats), threshold