Python numpy.searchsorted() Examples

The following are 30 code examples of numpy.searchsorted(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy , or try the search function .
Example #1
Source File: sparse.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _first_fill_value_loc(self):
        """
        Get the location of the first missing value.

        Returns
        -------
        int
        """
        if len(self) == 0 or self.sp_index.npoints == len(self):
            return -1

        indices = self.sp_index.to_int_index().indices
        if not len(indices) or indices[0] > 0:
            return 0

        diff = indices[1:] - indices[:-1]
        return np.searchsorted(diff, 2) + 1 
Example #2
Source File: Collection.py    From fullrmc with GNU Affero General Public License v3.0 6 votes vote down vote up
def get_real_index(self, relativeIndex):
        """
        Compute real index of the given relativeIndex considering
        already collected indexes.

        :Parameters:
            #. relativeIndex (int): Atom relative index to already collected
               indexes.

        :Parameters:
            #. index (int): Atom real index.
        """
        ### THIS IS NOT TESTED YET.
        indexes = np.array( sorted(self.indexes) )
        shift   = np.searchsorted(a=indexes, v=relativeIndex, side='left')
        index   = relativeIndex+shift
        for idx in indexes[shift:]:
            if idx > index:
                break
            index += 1
        return index 
Example #3
Source File: utils.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def sample_categorical(prob, rng):
    """Sample from independent categorical distributions

    Each batch is an independent categorical distribution.

    Parameters
    ----------
    prob : numpy.ndarray
      Probability of the categorical distribution. Shape --> (batch_num, category_num)
    rng : numpy.random.RandomState

    Returns
    -------
    ret : numpy.ndarray
      Sampling result. Shape --> (batch_num,)
    """
    ret = numpy.empty(prob.shape[0], dtype=numpy.float32)
    for ind in range(prob.shape[0]):
        ret[ind] = numpy.searchsorted(numpy.cumsum(prob[ind]), rng.rand()).clip(min=0.0,
                                                                                max=prob.shape[
                                                                                        1] - 0.5)
    return ret 
Example #4
Source File: pregenerate_training_data.py    From tpu_pretrain with Apache License 2.0 6 votes vote down vote up
def sample_doc(self, current_idx, sentence_weighted=True):
        # Uses the current iteration counter to ensure we don't sample the same doc twice
        if sentence_weighted:
            # With sentence weighting, we sample docs proportionally to their sentence length
            if self.doc_cumsum is None or len(self.doc_cumsum) != len(self.doc_lengths):
                self._precalculate_doc_weights()
            rand_start = self.doc_cumsum[current_idx]
            rand_end = rand_start + self.cumsum_max - self.doc_lengths[current_idx]
            sentence_index = randrange(rand_start, rand_end) % self.cumsum_max
            sampled_doc_index = np.searchsorted(self.doc_cumsum, sentence_index, side='right')
        else:
            # If we don't use sentence weighting, then every doc has an equal chance to be chosen
            sampled_doc_index = (current_idx + randrange(1, len(self.doc_lengths))) % len(self.doc_lengths)
        assert sampled_doc_index != current_idx
        if self.reduce_memory:
            return self.document_shelf[str(sampled_doc_index)]
        else:
            return self.documents[sampled_doc_index] 
Example #5
Source File: MoveGenerator.py    From fullrmc with GNU Affero General Public License v3.0 6 votes vote down vote up
def move(self, coordinates):
        """
        Move coordinates.

        :Parameters:
            #. coordinates (np.ndarray): The coordinates on which to apply
               the transformation.

        :Returns:
            #. coordinates (np.ndarray): The new coordinates after applying
               the transformation.
        """
        if self.__randomize:
            index = INT_TYPE( np.searchsorted(self.__selectionScheme, generate_random_float()) )
            moveGenerator = self.__collection[ index ]
        else:
            moveGenerator = self.__collection[self.__step]
            self.__step   = (self.__step+1)%len(self.__collection)
        # perform the move
        return moveGenerator.move(coordinates) 
Example #6
Source File: Collection.py    From fullrmc with GNU Affero General Public License v3.0 6 votes vote down vote up
def collect(self, index, dataDict, check=True):
        """
        Collect atom given its index.

        :Parameters:
            #. index (int): The atom index to collect.
            #. dataDict (dict): The atom data dict to collect.
            #. check (boolean):  Whether to check dataDict keys before
               collecting. If set to False, user promises that collected
               data is a dictionary and contains the needed keys.
        """
        assert not self.is_collected(index), LOGGER.error("attempting to collect and already collected atom of index '%i'"%index)
        # add data
        if check:
            assert isinstance(dataDict, dict), LOGGER.error("dataDict must be a dictionary of data where keys are dataKeys")
            assert tuple(sorted(dataDict)) == self.__dataKeys, LOGGER.error("dataDict keys don't match promised dataKeys")
        self.__collectedData[index] = dataDict
        # set indexes sorted array
        idx = np.searchsorted(a=self.__indexesSortedArray, v=index, side='left')
        self.__indexesSortedArray = np.insert(self.__indexesSortedArray, idx, index)
        # set state
        self.__state = str(uuid.uuid1()) 
Example #7
Source File: Collection.py    From fullrmc with GNU Affero General Public License v3.0 6 votes vote down vote up
def release(self, index):
        """
        Release atom from list of collected atoms and return its
        collected data.

        :Parameters:
            #. index (int): The atom index to release.

        :Returns:
            #. dataDict (dict): The released atom collected data.
        """
        if not self.is_collected(index):
            LOGGER.warn("Attempting to release atom %i that is not collected."%index)
            return
        index = self.__collectedData.pop(index)
        # set indexes sorted array
        idx = np.searchsorted(a=self.__indexesSortedArray, v=index, side='left')
        self.__indexesSortedArray = np.insert(self.__indexesSortedArray, idx, index)
        # set state
        self.__state = str(uuid.uuid1())
        # return
        return index 
Example #8
Source File: pfilter.py    From pfilter with MIT License 6 votes vote down vote up
def residual_resample(weights):
    n = len(weights)
    indices = np.zeros(n, np.uint32)
    # take int(N*w) copies of each weight
    num_copies = (n * weights).astype(np.uint32)
    k = 0
    for i in range(n):
        for _ in range(num_copies[i]):  # make n copies
            indices[k] = i
            k += 1
    # use multinormial resample on the residual to fill up the rest.
    residual = weights - num_copies  # get fractional part
    residual /= np.sum(residual)
    cumsum = np.cumsum(residual)
    cumsum[-1] = 1
    indices[k:n] = np.searchsorted(cumsum, np.random.uniform(0, 1, n - k))
    return indices 
Example #9
Source File: selection.py    From pyshgp with MIT License 6 votes vote down vote up
def select(self, population: Population, n: int = 1) -> Sequence[Individual]:
        """Return `n` individuals from the population.

        Parameters
        ----------
        population
            A Population of Individuals.
        n : int
            The number of parents to select from the population. Default is 1.

        Returns
        -------
        Sequence[Individual]
            The selected Individuals.

        """
        super().select(population, n)
        population_total_errors = np.array([i.total_error for i in population])
        sum_of_total_errors = np.sum(population_total_errors)
        probabilities = 1.0 - (population_total_errors / sum_of_total_errors)
        selected_ndxs = np.searchsorted(np.cumsum(probabilities), random(n))
        return [population[ndx] for ndx in selected_ndxs] 
Example #10
Source File: carbonara.py    From gnocchi with Apache License 2.0 6 votes vote down vote up
def __init__(self, ts, granularity, start=None):
        # NOTE(sileht): The whole class assumes ts is ordered and don't have
        # duplicate timestamps, it uses numpy.unique that sorted list, but
        # we always assume the orderd to be the same as the input.
        self.granularity = granularity
        self.can_derive = isinstance(granularity, numpy.timedelta64)
        self.start = start
        if start is None:
            self._ts = ts
            self._ts_for_derive = ts
        else:
            self._ts = ts[numpy.searchsorted(ts['timestamps'], start):]
            if self.can_derive:
                start_derive = start - granularity
                self._ts_for_derive = ts[
                    numpy.searchsorted(ts['timestamps'], start_derive):
                ]
        if self.can_derive:
            self.indexes = round_timestamp(self._ts['timestamps'], granularity)
        elif calendar.GROUPINGS.get(granularity):
            self.indexes = calendar.GROUPINGS.get(granularity)(
                self._ts['timestamps'])
        self.tstamps, self.counts = numpy.unique(self.indexes,
                                                 return_counts=True) 
Example #11
Source File: carbonara.py    From gnocchi with Apache License 2.0 6 votes vote down vote up
def __getitem__(self, key):
        if isinstance(key, numpy.datetime64):
            idx = numpy.searchsorted(self.timestamps, key)
            if self.timestamps[idx] == key:
                return self[idx]
            raise KeyError(key)
        if isinstance(key, slice):
            if isinstance(key.start, numpy.datetime64):
                start = numpy.searchsorted(self.timestamps, key.start)
            else:
                start = key.start
            if isinstance(key.stop, numpy.datetime64):
                stop = numpy.searchsorted(self.timestamps, key.stop)
            else:
                stop = key.stop
            key = slice(start, stop, key.step)
        return self.ts[key] 
Example #12
Source File: carbonara.py    From gnocchi with Apache License 2.0 6 votes vote down vote up
def set_values(self, values, before_truncate_callback=None):
        """Set the timestamps and values in this timeseries.

        :param values: A sorted timeseries array.
        :param before_truncate_callback: A callback function to call before
                                         truncating the BoundTimeSerie to its
                                         maximum size.
        :return: None of the return value of before_truncate_callback
        """
        if self.block_size is not None and len(self.ts) != 0:
            index = numpy.searchsorted(values['timestamps'],
                                       self.first_block_timestamp())
            values = values[index:]
        super(BoundTimeSerie, self).set_values(values)
        if before_truncate_callback:
            return_value = before_truncate_callback(self)
        else:
            return_value = None
        self._truncate()
        return return_value 
Example #13
Source File: inference_utils.py    From ffn with Apache License 2.0 6 votes vote down vote up
def compute_histogram_lut(image):
  """Computes the inverted CDF of image intensity.

  Args:
    image: 2d numpy array containing the image

  Returns:
    a 256-element numpy array representing a lookup table `lut`,
    such that lut[uniform_image] will transform `uniform_image` with
    a uniform intensity distribution to have an intensity distribution
    matching `image`.
  """
  cdf, bins = skimage.exposure.cumulative_distribution(image)
  lut = np.zeros(256, dtype=np.uint8)
  for i in range(0, 256):
    lut[i] = bins[np.searchsorted(cdf, i / 255.0)]

  return lut 
Example #14
Source File: test_bayestar.py    From dustmaps with GNU General Public License v2.0 6 votes vote down vote up
def _interp_ebv(self, datum, dist):
        """
        Calculate samples of E(B-V) at an arbitrary distance (in kpc) for one
        test coordinate.
        """
        dm = 5. * (np.log10(dist) + 2.)
        idx_ceil = np.searchsorted(datum['DM_bin_edges'], dm)
        if idx_ceil == 0:
            dist_0 = 10.**(datum['DM_bin_edges'][0]/5. - 2.)
            return dist/dist_0 * datum['samples'][:,0]
        elif idx_ceil == len(datum['DM_bin_edges']):
            return datum['samples'][:,-1]
        else:
            dm_ceil = datum['DM_bin_edges'][idx_ceil]
            dm_floor = datum['DM_bin_edges'][idx_ceil-1]
            a = (dm_ceil - dm) / (dm_ceil - dm_floor)
            return (
                (1.-a) * datum['samples'][:,idx_ceil]
                +    a * datum['samples'][:,idx_ceil-1]
            ) 
Example #15
Source File: period.py    From recruit with Apache License 2.0 6 votes vote down vote up
def asof_locs(self, where, mask):
        """
        where : array of timestamps
        mask : array of booleans where data is not NA

        """
        where_idx = where
        if isinstance(where_idx, DatetimeIndex):
            where_idx = PeriodIndex(where_idx.values, freq=self.freq)

        locs = self._ndarray_values[mask].searchsorted(
            where_idx._ndarray_values, side='right')

        locs = np.where(locs > 0, locs - 1, 0)
        result = np.arange(len(self))[mask].take(locs)

        first = mask.argmax()
        result[(locs == 0) & (where_idx._ndarray_values <
                              self._ndarray_values[first])] = -1

        return result 
Example #16
Source File: _pandas_ndarray_store.py    From arctic with GNU Lesser General Public License v2.1 6 votes vote down vote up
def _index_range(self, version, symbol, date_range=None, **kwargs):
        """ Given a version, read the segment_index and return the chunks associated
        with the date_range. As the segment index is (id -> last datetime)
        we need to take care in choosing the correct chunks. """
        if date_range and 'segment_index' in version:
            # index is read-only but it's never written to
            index = np.frombuffer(decompress(version['segment_index']), dtype=INDEX_DTYPE)
            dtcol = self._datetime64_index(index)
            if dtcol and len(index):
                dts = index[dtcol]
                start, end = _start_end(date_range, dts)
                if start > dts[-1]:
                    return -1, -1
                idxstart = min(np.searchsorted(dts, start), len(dts) - 1)
                idxend = min(np.searchsorted(dts, end, side='right'), len(dts) - 1)
                return int(index['index'][idxstart]), int(index['index'][idxend] + 1)
        return super(PandasStore, self)._index_range(version, symbol, **kwargs) 
Example #17
Source File: ls_fap.py    From feets with MIT License 6 votes vote down vote up
def fap_bootstrap(
    Z,
    fmax,
    t,
    y,
    dy,
    normalization="standard",
    n_bootstraps=1000,
    random_seed=None,
):
    rng = np.random.RandomState(random_seed)

    def bootstrapped_power():
        resample = rng.randint(0, len(y), len(y))  # sample with replacement
        ls_boot = LombScargle(t, y[resample], dy[resample])
        freq, power = ls_boot.autopower(
            normalization=normalization, maximum_frequency=fmax
        )
        return power.max()

    pmax = np.array([bootstrapped_power() for i in range(n_bootstraps)])
    pmax.sort()
    return 1 - np.searchsorted(pmax, Z) / len(pmax) 
Example #18
Source File: period.py    From recruit with Apache License 2.0 6 votes vote down vote up
def searchsorted(self, value, side='left', sorter=None):
        if isinstance(value, Period):
            if value.freq != self.freq:
                msg = DIFFERENT_FREQ.format(cls=type(self).__name__,
                                            own_freq=self.freqstr,
                                            other_freq=value.freqstr)
                raise IncompatibleFrequency(msg)
            value = value.ordinal
        elif isinstance(value, compat.string_types):
            try:
                value = Period(value, freq=self.freq).ordinal
            except DateParseError:
                raise KeyError("Cannot interpret '{}' as period".format(value))

        return self._ndarray_values.searchsorted(value, side=side,
                                                 sorter=sorter) 
Example #19
Source File: population.py    From ibllib with MIT License 6 votes vote down vote up
def _index_of(arr, lookup):
    """Replace scalars in an array by their indices in a lookup table.

    Implicitely assume that:

    * All elements of arr and lookup are non-negative integers.
    * All elements or arr belong to lookup.

    This is not checked for performance reasons.

    """
    # Equivalent of np.digitize(arr, lookup) - 1, but much faster.
    # TODO: assertions to disable in production for performance reasons.
    # TODO: np.searchsorted(lookup, arr) is faster on small arrays with large
    # values
    lookup = np.asarray(lookup, dtype=np.int32)
    m = (lookup.max() if len(lookup) else 0) + 1
    tmp = np.zeros(m + 1, dtype=np.int)
    # Ensure that -1 values are kept.
    tmp[-1] = -1
    if len(lookup):
        tmp[lookup] = np.arange(len(lookup))
    return tmp[arr] 
Example #20
Source File: graph.py    From jwalk with Apache License 2.0 6 votes vote down vote up
def encode_edges(edges, nodes):
    """Encode data with dictionary

    Args:
        edges (np.ndarray): np array of the form [node1, node2].
        nodes (np.array): list of unique nodes

    Returns:
        np.ndarray: relabeled edges

    Examples:
        >>> import numpy as np
        >>> edges = np.array([['A', 'B'], ['A', 'C']])
        >>> nodes = np.array(['C', 'B', 'A'])
        >>> print(encode_edges(edges, nodes))
        [[2 1]
         [2 0]]
    """
    sidx = nodes.argsort()
    relabeled_edges = sidx[np.searchsorted(nodes, edges, sorter=sidx)]
    return relabeled_edges 
Example #21
Source File: dmc.py    From pyqmc with MIT License 6 votes vote down vote up
def branch(configs, weights):
    """
    Perform branching on a set of walkers  by stochastic reconfiguration

    Walkers are resampled with probability proportional to the weights, and the new weights are all set to be equal to the average weight.
    
    Args:
      configs: (nconfig,nelec,3) walker coordinates

      weights: (nconfig,) walker weights

    Returns:
      configs: resampled walker configurations

      weights: (nconfig,) all weights are equal to average weight
    """
    nconfig = configs.configs.shape[0]
    wtot = np.sum(weights)
    probability = np.cumsum(weights / wtot)
    base = np.random.rand()
    newinds = np.searchsorted(probability, (base + np.arange(nconfig) / nconfig) % 1.0)
    configs.resample(newinds)
    weights.fill(wtot / nconfig)
    return configs, weights 
Example #22
Source File: cascade_lifetime.py    From news-popularity-prediction with Apache License 2.0 6 votes vote down vote up
def get_k_based_on_lifetime(data_frame, lifetime, min_k, max_k):
    lifetime_col = data_frame["timestamp"] - data_frame["timestamp"].iloc[0]
    lifetime_col = lifetime_col.iloc[min_k:]

    index = np.searchsorted(lifetime_col, lifetime)

    index = max(0, index[0]-1)

    k = min_k + index

    if lifetime_col.size > (index+1):
        next_t = lifetime_col.iloc[index+1]
        if k == min_k:
            if lifetime_col.iloc[index] == lifetime_col.iloc[index+1]:
                k += 1
                if lifetime_col.size > (index+2):
                    next_t = lifetime_col.iloc[index+2]
                else:
                    next_t = np.nan
    else:
        next_t = np.nan

    return k, next_t 
Example #23
Source File: carbonara.py    From gnocchi with Apache License 2.0 6 votes vote down vote up
def truncate(self, oldest_point=None):
        """Truncate the time series up to oldest_point excluded.

        :param oldest_point: Oldest point to keep from, this excluded.
                             Default is the aggregation timespan.
        :type oldest_point: numpy.datetime64 or numpy.timedelta64
        :return: The oldest point that could have been kept.
        """
        last = self.last
        if last is None:
            return
        if oldest_point is None:
            oldest_point = self.aggregation.timespan
            if oldest_point is None:
                return
        if isinstance(oldest_point, numpy.timedelta64):
            oldest_point = last - oldest_point
        index = numpy.searchsorted(self.ts['timestamps'], oldest_point,
                                   side='right')
        self.ts = self.ts[index:]
        return oldest_point 
Example #24
Source File: period.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _get_string_slice(self, key):
        if not self.is_monotonic:
            raise ValueError('Partial indexing only valid for '
                             'ordered time series')

        key, parsed, reso = parse_time_string(key, self.freq)
        grp = resolution.Resolution.get_freq_group(reso)
        freqn = resolution.get_freq_group(self.freq)
        if reso in ['day', 'hour', 'minute', 'second'] and not grp < freqn:
            raise KeyError(key)

        t1, t2 = self._parsed_string_to_bounds(reso, parsed)
        return slice(self.searchsorted(t1.ordinal, side='left'),
                     self.searchsorted(t2.ordinal, side='right')) 
Example #25
Source File: base.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _searchsorted_monotonic(self, label, side='left'):
        if self.is_monotonic_increasing:
            return self.searchsorted(label, side=side)
        elif self.is_monotonic_decreasing:
            # np.searchsorted expects ascending sort order, have to reverse
            # everything for it to work (element ordering, search side and
            # resulting value).
            pos = self[::-1].searchsorted(label, side='right' if side == 'left'
                                          else 'left')
            return len(self) - pos

        raise ValueError('index must be monotonic increasing or decreasing') 
Example #26
Source File: utils.py    From mars with Apache License 2.0 5 votes vote down vote up
def calc_columns_index(column_name, df):
    """
    Calculate the chunk index on the axis 1 according to the selected column.
    :param column_name: selected column name
    :param df: input tiled DataFrame
    :return: chunk index on the columns axis
    """
    column_nsplits = df.nsplits[1]
    column_loc = df.columns_value.to_pandas().get_loc(column_name)
    return np.searchsorted(np.cumsum(column_nsplits), column_loc + 1) 
Example #27
Source File: test_datetime_index.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_resample_size():
    n = 10000
    dr = date_range('2015-09-19', periods=n, freq='T')
    ts = Series(np.random.randn(n), index=np.random.choice(dr, n))

    left = ts.resample('7T').size()
    ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T')

    bins = np.searchsorted(ix.values, ts.index.values, side='right')
    val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64',
                                                              copy=False)

    right = Series(val, index=ix)
    assert_series_equal(left, right) 
Example #28
Source File: histograms.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _search_sorted_inclusive(a, v):
    """
    Like `searchsorted`, but where the last item in `v` is placed on the right.

    In the context of a histogram, this makes the last bin edge inclusive
    """
    return np.concatenate((
        a.searchsorted(v[:-1], 'left'),
        a.searchsorted(v[-1:], 'right')
    )) 
Example #29
Source File: sparse.py    From recruit with Apache License 2.0 5 votes vote down vote up
def searchsorted(self, v, side="left", sorter=None):
        msg = "searchsorted requires high memory usage."
        warnings.warn(msg, PerformanceWarning, stacklevel=2)
        if not is_scalar(v):
            v = np.asarray(v)
        v = np.asarray(v)
        return np.asarray(self, dtype=self.dtype.subtype).searchsorted(
            v, side, sorter
        ) 
Example #30
Source File: test_datetime_index.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_resample_group_info(n, k):
    # GH10914

    # use a fixed seed to always have the same uniques
    prng = np.random.RandomState(1234)

    dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
    ts = Series(prng.randint(0, n // k, n).astype('int64'),
                index=prng.choice(dr, n))

    left = ts.resample('30T').nunique()
    ix = date_range(start=ts.index.min(), end=ts.index.max(),
                    freq='30T')

    vals = ts.values
    bins = np.searchsorted(ix.values, ts.index, side='right')

    sorter = np.lexsort((vals, bins))
    vals, bins = vals[sorter], bins[sorter]

    mask = np.r_[True, vals[1:] != vals[:-1]]
    mask |= np.r_[True, bins[1:] != bins[:-1]]

    arr = np.bincount(bins[mask] - 1,
                      minlength=len(ix)).astype('int64', copy=False)
    right = Series(arr, index=ix)

    assert_series_equal(left, right)