Python torch.cumsum() Examples

The following are 30 code examples of torch.cumsum(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch , or try the search function .
Example #1
Source File: TE.py    From DEEPSEC with MIT License 6 votes vote down vote up
def thermometer_encoding(samples=None, level=None, device=None):
    """
    the help function to encode the samples using the thermometer encoding schema
    :param samples:
    :param level:
    :param device:
    :return:
    """
    assert level is not None and isinstance(level, int), 'level should specified as an integer'
    assert torch.is_tensor(samples), "input samples must be a PyTorch tensor"
    if len(samples.shape) >= 4 and (samples.shape[1] == 1 or samples.shape[1] == 3):
        samples = samples.permute(0, 2, 3, 1)

    # convert one hot encoding to thermometer encoding
    one_hot_samples = one_hot_encoding(samples=samples, level=level, device=device)
    therm_samples = torch.cumsum(one_hot_samples, dim=-1)

    # the returned samples is a type of numpy data with shape [BatchSize * (Channel * Level) * Weight* Height]
    shape = samples.shape
    therm_samples_numpy = torch.reshape(therm_samples, (shape[0], shape[1], shape[2], shape[3] * level))
    therm_samples_numpy = therm_samples_numpy.permute(0, 3, 1, 2).cpu().numpy()

    return therm_samples_numpy 
Example #2
Source File: tu.py    From pytorch_geometric with MIT License 6 votes vote down vote up
def split(data, batch):
    node_slice = torch.cumsum(torch.from_numpy(np.bincount(batch)), 0)
    node_slice = torch.cat([torch.tensor([0]), node_slice])

    row, _ = data.edge_index
    edge_slice = torch.cumsum(torch.from_numpy(np.bincount(batch[row])), 0)
    edge_slice = torch.cat([torch.tensor([0]), edge_slice])

    # Edge indices should start at zero for every graph.
    data.edge_index -= node_slice[batch[row]].unsqueeze(0)
    data.__num_nodes__ = torch.bincount(batch).tolist()

    slices = {'edge_index': edge_slice}
    if data.x is not None:
        slices['x'] = node_slice
    if data.edge_attr is not None:
        slices['edge_attr'] = edge_slice
    if data.y is not None:
        if data.y.size(0) == batch.size(0):
            slices['y'] = node_slice
        else:
            slices['y'] = torch.arange(0, batch[-1] + 2, dtype=torch.long)

    return data, slices 
Example #3
Source File: BinaryTreeBasedModule.py    From latent-treelstm with MIT License 6 votes vote down vote up
def _merge(actions, h_l, c_l, h_r, c_r, h_p, c_p, mask):
        """
        This method merges left and right TreeLSTM states. It reuses already precomputed states for the parent node,
        but still, has to apply correct masking.
        """
        cumsum = torch.cumsum(actions, dim=-1)
        mask_l = (1.0 - cumsum)[..., None]
        mask_r = (cumsum - actions)[..., None]
        mask = mask[..., None]
        actions = actions[..., None]
        # If the row of mask matrix is zero ignore everything calculated so far and copy the corresponding left hidden
        # states from the previous layer (the assumption here is that one adds padding tokens to the right side and
        # action that uses padding token can't be sampled if the row of a mask is a nonzero vector).
        # Eventually, you will end up with the leftmost state on the top that contains a correct required value.
        h_p = (mask_l * h_l + actions * h_p + mask_r * h_r) * mask + h_l * (1. - mask)
        c_p = (mask_l * c_l + actions * c_p + mask_r * c_r) * mask + c_l * (1. - mask)
        return h_p, c_p 
Example #4
Source File: functions.py    From fairseq with MIT License 6 votes vote down vote up
def safe_cumprod(tensor, dim: int, eps: float = 1e-10):
    """
    An implementation of cumprod to prevent precision issue.
    cumprod(x)
    = [x1, x1x2, x1x2x3, ....]
    = [exp(log(x1)), exp(log(x1) + log(x2)), exp(log(x1) + log(x2) + log(x3)), ...]
    = exp(cumsum(log(x)))
    """

    if (tensor + eps < 0).any().item():
        raise RuntimeError(
            "Safe cumprod can only take non-negative tensors as input."
            "Consider use torch.cumprod if you want to calculate negative values."
        )

    log_tensor = torch.log(tensor + eps)
    cumsum_log_tensor = torch.cumsum(log_tensor, dim)
    exp_cumsum_log_tensor = torch.exp(cumsum_log_tensor)
    return exp_cumsum_log_tensor 
Example #5
Source File: train.py    From pytorch-asr with GNU General Public License v3.0 6 votes vote down vote up
def validate(self, data_loader):
        "validate with label error rate by the edit distance between hyps and refs"
        self.model.eval()
        with torch.no_grad():
            N, D = 0, 0
            t = tqdm(enumerate(data_loader), total=len(data_loader), desc="validating")
            for i, (data) in t:
                xs, ys, frame_lens, label_lens, filenames, texts = data
                if self.use_cuda:
                    xs = xs.cuda()
                ys_hat = self.model(xs)
                # convert likes to ctc labels
                frame_lens = torch.ceil(frame_lens.float() / FRAME_REDUCE_FACTOR).int()
                hyps = [onehot2int(yh[:s]).squeeze() for yh, s in zip(ys_hat, frame_lens)]
                hyps = [remove_duplicates(h, blank=0) for h in hyps]
                # slice the targets
                pos = torch.cat((torch.zeros((1, ), dtype=torch.long), torch.cumsum(label_lens, dim=0)))
                refs = [ys[s:l] for s, l in zip(pos[:-1], pos[1:])]
                # calculate ler
                N += self.edit_distance(refs, hyps)
                D += sum(len(r) for r in refs)
                ler = N * 100. / D
                t.set_description(f"validating (LER: {ler:.2f} %)")
                t.refresh()
            logger.info(f"validating at epoch {self.epoch:03d}: LER {ler:.2f} %") 
Example #6
Source File: bitcoding.py    From L3C-PyTorch with GNU General Public License v3.0 6 votes vote down vote up
def _get_cdf_from_pr(pr):
    """
    :param pr: NHWL
    :return: NHW(L+1) as int16 on CPU!
    """
    N, H, W, _ = pr.shape

    precision = 16

    cdf = torch.cumsum(pr, -1)
    cdf = cdf.mul_(2**precision)
    cdf = cdf.round()
    cdf = torch.cat((torch.zeros((N, H, W, 1), dtype=cdf.dtype, device=cdf.device),
                     cdf), dim=-1)
    cdf = cdf.to('cpu', dtype=torch.int16, non_blocking=True)

    return cdf 
Example #7
Source File: trainer.py    From pytorch-asr with GNU General Public License v3.0 6 votes vote down vote up
def unit_validate(self, data):
        xs, ys, frame_lens, label_lens, filenames, _ = data
        if self.use_cuda:
            xs = xs.cuda(non_blocking=True)
        ys_hat = self.model(xs)
        if self.fp16:
            ys_hat = ys_hat.float()
        pos = torch.cat((torch.zeros((1, ), dtype=torch.long), torch.cumsum(frame_lens, dim=0)))
        ys_hat = [ys_hat.narrow(0, p, l).clone() for p, l in zip(pos[:-1], frame_lens)]
        # convert likes to ctc labels
        hyps = [onehot2int(yh[:s]).squeeze() for yh, s in zip(ys_hat, frame_lens)]
        hyps = [remove_duplicates(h, blank=0) for h in hyps]
        # slice the targets
        pos = torch.cat((torch.zeros((1, ), dtype=torch.long), torch.cumsum(label_lens, dim=0)))
        refs = [ys[s:l] for s, l in zip(pos[:-1], pos[1:])]
        return hyps, refs 
Example #8
Source File: scatter.py    From torchsupport with MIT License 6 votes vote down vote up
def reduced_sequential(module, data, indices, out=None, dim_size=None):
  packed, pack_indices, counts = pack(data, indices)
  result, hidden = module(packed)
  last = torch.cumsum(counts, dim=0) - 1

  if dim_size is None:
    dim_size = indices.max() + 1
  if out is None:
    out = torch.zeros(
      dim_size, result.shape[1:],
      dtype=data.dtype, device=data.device
    )
  out_hidden = torch.zeros_like(out)
  out[pack_indices] += result.data[last]
  out_hidden[pack_indices] += hidden.data[0]

  return out, out_hidden 
Example #9
Source File: scatter.py    From torchsupport with MIT License 6 votes vote down vote up
def pairwise_no_pad(op, data, indices):
  unique, counts = indices.unique(return_counts=True)
  expansion = torch.cumsum(counts, dim=0)
  expansion = torch.repeat_interleave(expansion, counts)
  offset = torch.arange(0, counts.sum(), device=data.device)
  expansion = expansion - offset - 1
  expanded = torch.repeat_interleave(data, expansion.to(data.device), dim=0)

  expansion_offset = counts.roll(1)
  expansion_offset[0] = 0
  expansion_offset = expansion_offset.cumsum(dim=0)
  expansion_offset = torch.repeat_interleave(expansion_offset, counts)
  expansion_offset = torch.repeat_interleave(expansion_offset, expansion)
  off_start = torch.repeat_interleave(torch.repeat_interleave(counts, counts) - expansion, expansion)
  access = torch.arange(expansion.sum(), device=data.device)
  access = access - torch.repeat_interleave(expansion.roll(1).cumsum(dim=0), expansion) + off_start + expansion_offset

  result = op(expanded, data[access.to(data.device)])
  return result, torch.repeat_interleave(indices, expansion, dim=0) 
Example #10
Source File: viterbi.py    From didyprog with MIT License 6 votes vote down vote up
def max(X):
        seq_len, n_batch, n_states = X.shape
        X_sorted, _ = torch.sort(X, dim=2, descending=True)
        cssv = torch.cumsum(X_sorted, dim=2) - 1
        ind = X.new(n_states)
        for i in range(n_states):
            ind[i] = i + 1
        cond = X_sorted - cssv / ind > 0
        rho = cond.long().sum(dim=2)
        cssv = cssv.view(-1, n_states)
        rho = rho.view(-1)

        tau = (torch.gather(cssv, dim=1, index=rho[:, None] - 1)[:, 0]
               / rho.type(X.type()))
        tau = tau.view(seq_len, n_batch)
        A = torch.clamp(X - tau[:, :, None], min=0)
        # A /= A.sum(dim=2, keepdim=True)

        M = torch.sum(A * (X - .5 * A), dim=2)

        return M.squeeze(), A.squeeze() 
Example #11
Source File: adhoc_metric.py    From pt-ranking.github.io with MIT License 6 votes vote down vote up
def torch_discounted_cumu_gain_at_ks(sorted_labels, max_cutoff, multi_level_rele=True):
	'''
	ICML-nDCG, which places stronger emphasis on retrieving relevant documents
	:param sorted_labels: ranked labels (either standard or predicted by a system) in the form of np array
	:param max_cutoff: the maximum rank position to be considered
	:param multi_lavel_rele: either the case of multi-level relevance or the case of listwise int-value, e.g., MQ2007-list
	:return: cumulative gains for each rank position
	'''

	if multi_level_rele:    #the common case with multi-level labels
		nums = torch.pow(2.0, sorted_labels[0:max_cutoff]) - 1.0
	else:
		nums = sorted_labels[0:max_cutoff]  #the case like listwise ltr_adhoc, where the relevance is labeled as (n-rank_position)

	denoms = torch.log2(torch.arange(max_cutoff).type(torch.FloatTensor) + 2.0)   #discounting factor
	dited_cumu_gains = torch.cumsum(nums/denoms, dim=0)   # discounted cumulative gain value w.r.t. each position

	return dited_cumu_gains 
Example #12
Source File: metric.py    From pt-ranking.github.io with MIT License 6 votes vote down vote up
def tor_discounted_cumu_gain_at_ks(sorted_labels, max_cutoff, multi_level_rele=True):
    '''
    ICML-nDCG, which places stronger emphasis on retrieving relevant documents
    :param sorted_labels: ranked labels (either standard or predicted by a system) in the form of np array
    :param max_cutoff: the maximum rank position to be considered
    :param multi_lavel_rele: either the case of multi-level relevance or the case of listwise int-value, e.g., MQ2007-list
    :return: cumulative gains for each rank position
    '''

    if multi_level_rele:    #the common case with multi-level labels
        nums = torch.pow(2.0, sorted_labels[0:max_cutoff]) - 1.0
    else:
        nums = sorted_labels[0:max_cutoff]  #the case like listwise ranking, where the relevance is labeled as (n-rank_position)

    denoms = torch.log2(torch.arange(max_cutoff, dtype=torch.float) + 2.0)   #discounting factor
    dited_cumu_gains = torch.cumsum(nums/denoms, dim=0)   # discounted cumulative gain value w.r.t. each position

    return dited_cumu_gains 
Example #13
Source File: listmle.py    From pt-ranking.github.io with MIT License 6 votes vote down vote up
def backward(ctx, grad_output):
		'''
		In the backward pass we receive the context object and
		a Tensor containing the gradient of the loss with respect to the output produced during the forward pass (i.e., forward's output).
		We can retrieve cached data from the context object, and
		must compute and return the gradient of the loss with respect to the input to the forward function.
		Namely, grad_output is the gradient of the loss w.r.t. forward's output. Here we first compute the gradient (denoted as grad_out_wrt_in) of forward's output w.r.t. forward's input.
		Based on the chain rule, grad_output * grad_out_wrt_in would be the desired output, i.e., the gradient of the loss w.r.t. forward's input
		:param ctx:
		:param grad_output:
		:return:
		'''

		input, fd_output = ctx.saved_tensors
		#chain rule
		bk_output = grad_output * (torch.exp(input) * torch.cumsum(torch.exp(-fd_output), dim=1))

		return bk_output 
Example #14
Source File: listmle.py    From pt-ranking.github.io with MIT License 6 votes vote down vote up
def forward(ctx, input):
		'''
		In the forward pass we receive a context object and a Tensor containing the input;
		we must return a Tensor containing the output, and we can use the context object to cache objects for use in the backward pass.
		Specifically, ctx is a context object that can be used to stash information for backward computation.
		You can cache arbitrary objects for use in the backward pass using the ctx.save_for_backward method.
		:param ctx:
		:param input: i.e., batch_preds of [batch, ranking_size], each row represents the relevance predictions for documents within a ltr_adhoc
		:return: [batch, ranking_size], each row represents the log_cumsum_exp value
		'''

		m, _ = torch.max(input, dim=1, keepdim=True)    #a transformation aiming for higher stability when computing softmax() with exp()
		y = input - m
		y = torch.exp(y)
		y_cumsum_t2h = torch.flip(torch.cumsum(torch.flip(y, dims=[1]), dim=1), dims=[1])    #row-wise cumulative sum, from tail to head
		fd_output = torch.log(y_cumsum_t2h) + m # corresponding to the '-m' operation

		ctx.save_for_backward(input, fd_output)

		return fd_output 
Example #15
Source File: gaussian_mixture.py    From FrEIA with MIT License 6 votes vote down vote up
def pick_mixture_component(w, seed=None):
        '''Randomly choose mixture component indices with probability given by
        the component weights w. Works on batches of component weights.

        w:      Weights of the mixture components, must be positive and sum to one
        seed:   Optional RNG seed for consistent decisions'''

        w_thresholds = torch.cumsum(w, dim=1)
        # Prepare local random number generator
        rng = torch.Generator(device=w.device)
        if isinstance(seed, int):
            rng = rng.manual_seed(seed)
        else:
            rng.seed()
        # Draw one uniform random number per batch row and compare against thresholds
        u = torch.rand(w.shape[0], 1, device=w.device, generator=rng)
        indices = torch.sum(u > w_thresholds, dim=1).int()
        # Return mixture component indices
        return indices 
Example #16
Source File: runningstats.py    From gandissect with MIT License 6 votes vote down vote up
def quantiles(self, quantiles, old_style=False):
        if self.size == 0:
            return torch.full((self.depth, len(quantiles)), torch.nan)
        summary, weights = self._weighted_summary()
        cumweights = torch.cumsum(weights, dim=-1) - weights / 2
        if old_style:
            # To be convenient with torch.percentile
            cumweights -= cumweights[:,0:1].clone()
            cumweights /= cumweights[:,-1:].clone()
        else:
            cumweights /= torch.sum(weights, dim=-1, keepdim=True)
        result = torch.zeros(self.depth, len(quantiles),
                dtype=self.dtype, device=self.device)
        # numpy is needed for interpolation
        if not hasattr(quantiles, 'cpu'):
            quantiles = torch.Tensor(quantiles)
        nq = quantiles.cpu().numpy()
        ncw = cumweights.cpu().numpy()
        nsm = summary.cpu().numpy()
        for d in range(self.depth):
            result[d] = torch.tensor(numpy.interp(nq, ncw[d], nsm[d]),
                    dtype=self.dtype, device=self.device)
        return result 
Example #17
Source File: runningstats.py    From gandissect with MIT License 6 votes vote down vote up
def normalize(self, data):
        '''
        Given input data as taken from the training distirbution,
        normalizes every channel to reflect quantile values,
        uniformly distributed, within [0, 1].
        '''
        assert self.size > 0
        assert data.shape[0] == self.depth
        summary, weights = self._weighted_summary()
        cumweights = torch.cumsum(weights, dim=-1) - weights / 2
        cumweights /= torch.sum(weights, dim=-1, keepdim=True)
        result = torch.zeros_like(data).float()
        # numpy is needed for interpolation
        ndata = data.cpu().numpy().reshape((data.shape[0], -1))
        ncw = cumweights.cpu().numpy()
        nsm = summary.cpu().numpy()
        for d in range(self.depth):
            normed = torch.tensor(numpy.interp(ndata[d], nsm[d], ncw[d]),
                dtype=torch.float, device=data.device).clamp_(0.0, 1.0)
            if len(data.shape) > 1:
                normed = normed.view(*(data.shape[1:]))
            result[d] = normed
        return result 
Example #18
Source File: tiling.py    From heat with MIT License 6 votes vote down vote up
def __adjust_last_row_sp0_m_ge_n(
        arr, lshape_map, last_diag_pr, row_inds, row_per_proc_list, tile_columns
    ):
        """

        Need to adjust the size of last row if arr.split == 0 and the diagonal ends before the
        last tile. This should only be run if arr,split == 0 and last_diag_pr < arr.comm.size - 1.
        """
        # need to find the amount of data after the diagonal
        lshape_cumsum = torch.cumsum(lshape_map[..., 0], dim=0)
        diff = lshape_cumsum[last_diag_pr] - arr.gshape[1]
        if diff > torch.true_divide(lshape_map[last_diag_pr, 0], 2):  # todo: tune this?
            # if the shape diff is > half the data on the process
            #   then add a row after the diagonal, todo: is multiple rows faster?
            row_inds.insert(tile_columns, diff)
            row_per_proc_list[last_diag_pr] += 1
        else:
            # if the diff is < half the data on the process
            #   then extend the last row inds to be the end of the process
            row_inds[tile_columns - 1] += diff 
Example #19
Source File: entity_ranking.py    From kge with MIT License 6 votes vote down vote up
def _compute_metrics(self, rank_hist, suffix=""):
        """Computes desired matrix from rank histogram"""
        metrics = {}
        n = torch.sum(rank_hist).item()

        ranks = torch.arange(1, self.dataset.num_entities() + 1).float().to(self.device)
        metrics["mean_rank" + suffix] = (
            (torch.sum(rank_hist * ranks).item() / n) if n > 0.0 else 0.0
        )

        reciprocal_ranks = 1.0 / ranks
        metrics["mean_reciprocal_rank" + suffix] = (
            (torch.sum(rank_hist * reciprocal_ranks).item() / n) if n > 0.0 else 0.0
        )

        hits_at_k = (
            (torch.cumsum(rank_hist[: max(self.hits_at_k_s)], dim=0) / n).tolist()
            if n > 0.0
            else [0.0] * max(self.hits_at_k_s)
        )

        for i, k in enumerate(self.hits_at_k_s):
            metrics["hits_at_{}{}".format(k, suffix)] = hits_at_k[k - 1]

        return metrics 
Example #20
Source File: predictor.py    From pytorch-asr with GNU General Public License v3.0 6 votes vote down vote up
def decode(self, data_loader):
        self.model.eval()
        with torch.no_grad():
            for i, (data) in enumerate(data_loader):
                # predict phones using AM
                xs, frame_lens, filenames = data
                if self.use_cuda:
                    xs = xs.cuda(non_blocking=True)
                ys_hat = self.model(xs)
                ys_hat = ys_hat.unsqueeze(dim=0).transpose(1, 2)
                pos = torch.cat((torch.zeros((1, ), dtype=torch.long), torch.cumsum(frame_lens, dim=0)))
                ys_hats = [ys_hat.narrow(2, p, l).clone() for p, l in zip(pos[:-1], frame_lens)]
                max_len = torch.max(frame_lens)
                ys_hats = [nn.ConstantPad1d((0, max_len-yh.size(2)), 0)(yh) for yh in ys_hats]
                ys_hat = torch.cat(ys_hats).transpose(1, 2)
                # latgen decoding
                if self.use_cuda:
                    ys_hat = ys_hat.cpu()
                words, alignment, w_sizes, a_sizes = self.decoder(ys_hat, frame_lens)
                # print results
                ys_hat = [y[:s] for y, s in zip(ys_hat, frame_lens)]
                words = [w[:s] for w, s in zip(words, w_sizes)]
                for results in zip(filenames, ys_hat, words):
                    self.print_result(*results) 
Example #21
Source File: oft.py    From oft with MIT License 5 votes vote down vote up
def integral_image(features):
    return torch.cumsum(torch.cumsum(features, dim=-1), dim=-2) 
Example #22
Source File: generate.py    From GPT2-Chinese with MIT License 5 votes vote down vote up
def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (vocabulary size)
            top_k > 0: keep only top k tokens with highest probability (top-k filtering).
            top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
        From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    assert logits.dim() == 1  # batch size 1 for now - could be updated for more but the code would be less clear
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token of the top-k
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
        logits[indices_to_remove] = filter_value

    if top_p > 0.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices[sorted_indices_to_remove]
        logits[indices_to_remove] = filter_value
    return logits 
Example #23
Source File: ptBEV.py    From PolarSeg with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def grp_range_torch(a,dev):
    idx = torch.cumsum(a,0)
    id_arr = torch.ones(idx[-1],dtype = torch.int64,device=dev)
    id_arr[0] = 0
    id_arr[idx[:-1]] = -a[:-1]+1
    return torch.cumsum(id_arr,0) 
Example #24
Source File: generate_texts.py    From GPT2-Chinese with MIT License 5 votes vote down vote up
def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (vocabulary size)
            top_k > 0: keep only top k tokens with highest probability (top-k filtering).
            top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
        From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    assert logits.dim() == 1  # batch size 1 for now - could be updated for more but the code would be less clear
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token of the top-k
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
        logits[indices_to_remove] = filter_value

    if top_p > 0.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices[sorted_indices_to_remove]
        logits[indices_to_remove] = filter_value
    return logits 
Example #25
Source File: modeling_utils.py    From exbert with Apache License 2.0 5 votes vote down vote up
def create_position_ids_from_input_ids(input_ids, padding_idx):
    """ Replace non-padding symbols with their position numbers. Position numbers begin at
    padding_idx+1. Padding symbols are ignored. This is modified from fairseq's
    `utils.make_positions`.

    :param torch.Tensor x:
    :return torch.Tensor:
    """
    # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
    mask = input_ids.ne(padding_idx).int()
    incremental_indicies = torch.cumsum(mask, dim=1).type_as(mask) * mask
    return incremental_indicies.long() + padding_idx 
Example #26
Source File: modeling_utils.py    From exbert with Apache License 2.0 5 votes vote down vote up
def top_k_top_p_filtering(logits, top_k=0, top_p=1.0, filter_value=-float("Inf"), min_tokens_to_keep=1):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (batch size, vocabulary size)
            if top_k > 0: keep only top k tokens with highest probability (top-k filtering).
            if top_p < 1.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
            Make sure we keep at least min_tokens_to_keep per batch example in the output
        From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    if top_k > 0:
        top_k = min(max(top_k, min_tokens_to_keep), logits.size(-1))  # Safety check
        # Remove all tokens with a probability less than the last token of the top-k
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
        logits[indices_to_remove] = filter_value

    if top_p < 1.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold (token with 0 are kept)
        sorted_indices_to_remove = cumulative_probs > top_p
        if min_tokens_to_keep > 1:
            # Keep at least min_tokens_to_keep (set to min_tokens_to_keep-1 because we add the first one below)
            sorted_indices_to_remove[..., :min_tokens_to_keep] = 0
        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        # scatter sorted tensors to original indexing
        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
        logits[indices_to_remove] = filter_value
    return logits 
Example #27
Source File: tiling.py    From heat with MIT License 5 votes vote down vote up
def __adjust_cols_sp1_m_ls_n(arr, col_per_proc_list, last_diag_pr, col_inds, lshape_map):
        """

        Add more columns after the diagonal ends if m < n and arr.split == 1
        """
        # need to add to col inds with the rest of the columns
        tile_columns = sum(col_per_proc_list)
        r = last_diag_pr + 1
        for i in range(len(col_inds), tile_columns):
            col_inds.append(lshape_map[r, 1])
            r += 1
        # if the 1st dim is > 0th dim then in split=1 the cols need to be extended
        col_proc_ind = torch.cumsum(
            torch.tensor(col_per_proc_list, device=arr._DNDarray__array.device), dim=0
        )
        for pr in range(arr.comm.size):
            lshape_cumsum = torch.cumsum(lshape_map[..., 1], dim=0)
            col_cumsum = torch.cumsum(
                torch.tensor(col_inds, device=arr._DNDarray__array.device), dim=0
            )
            diff = lshape_cumsum[pr] - col_cumsum[col_proc_ind[pr] - 1]
            if diff > 0 and pr <= last_diag_pr:
                col_per_proc_list[pr] += 1
                col_inds.insert(col_proc_ind[pr], diff)
            if pr > last_diag_pr and diff > 0:
                col_inds.insert(col_proc_ind[pr], diff) 
Example #28
Source File: pt_extensions.py    From pt-ranking.github.io with MIT License 5 votes vote down vote up
def pl_normalize(batch_scores=None):
    '''
    Normalization based on the 'Plackett_Luce' model
    :param batch_scores: [batch, ranking_size]
    :return: the i-th entry represents the probability of being ranked at the i-th position
    '''
    m, _ = torch.max(batch_scores, dim=1, keepdim=True)  # for higher stability
    y = batch_scores - m
    y = torch.exp(y)
    y_cumsum_t2h = flip(torch.cumsum(flip(y, dim=1), dim=1), dim=1)  # row-wise cumulative sum, from tail to head
    batch_pros = torch.div(y, y_cumsum_t2h)

    return batch_pros 
Example #29
Source File: metric.py    From pt-ranking.github.io with MIT License 5 votes vote down vote up
def tor_err_at_ks(sys_sorted_labels, ks=None, multi_level_rele=True, max_rele_level=None):
    '''
    :param sys_sorted_labels: the standard labels sorted in descending order according to predicted relevance scores
    :param ks:
    :param multi_level_rele:
    :param max_rele_level:
    :return:
    '''
    valid_max = sys_sorted_labels.size(0)
    used_ks = [k for k in ks if k <= valid_max] if valid_max < max(ks) else ks

    max_cutoff = max(used_ks)
    inds = torch.from_numpy(np.asarray(used_ks) - 1)
    if multi_level_rele:
        positions = torch.arange(max_cutoff) + 1.0
        expt_ranks = 1.0 / positions    # expected stop positions

        tor_max_rele = torch.Tensor([max_rele_level]).float()
        satis_pros = (torch.pow(2.0, sys_sorted_labels[0:max_cutoff]) - 1.0)/torch.pow(2.0, tor_max_rele)
        non_satis_pros = torch.ones(max_cutoff) - satis_pros
        cum_non_satis_pros = torch.cumprod(non_satis_pros, dim=0)

        cascad_non_satis_pros = positions
        cascad_non_satis_pros[1:max_cutoff] = cum_non_satis_pros[0:max_cutoff-1]
        expt_satis_ranks = expt_ranks * satis_pros * cascad_non_satis_pros  # w.r.t. all rank positions
        err_at_ranks = torch.cumsum(expt_satis_ranks, dim=0)

        err_at_ks = err_at_ranks[inds]
        if valid_max < max(ks):
            padded_err_at_ks = torch.zeros(len(ks))
            padded_err_at_ks[0:len(used_ks)] = err_at_ks
            return padded_err_at_ks
        else:
            return err_at_ks
    else:
        raise NotImplementedError 
Example #30
Source File: sampling_utils.py    From pt-ranking.github.io with MIT License 5 votes vote down vote up
def uniform_rand_per_label(uni_cnts):
    """ can be compatible with batch """
    num_unis = uni_cnts.size(0)  # number of unique elements
    inner_rand_inds = (torch.rand(num_unis) * uni_cnts.type(tensor)).type(
        torch.LongTensor)  # random index w.r.t each interval
    begs = torch.cumsum(torch.cat([tensor([0.]).type(torch.LongTensor), uni_cnts[0:num_unis - 1]]),
                        dim=0)  # begin positions of each interval within the same vector
    # print('begin positions', begs)
    rand_inds_per_label = begs + inner_rand_inds
    # print('random index', rand_inds_per_label)  # random index tensor([ 0,  1,  3,  6, 10]) ([0, 2, 3, 5, 8])

    return rand_inds_per_label