Python numpy.lexsort() Examples

The following are 30 code examples for showing how to use numpy.lexsort(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module numpy , or try the search function .

Example 1
Project: pymoo   Author: msu-coinlab   File: misc.py    License: Apache License 2.0 6 votes vote down vote up
def get_duplicates(M):
    res = []
    I = np.lexsort([M[:, i] for i in reversed(range(0, M.shape[1]))])
    S = M[I, :]

    i = 0

    while i < S.shape[0] - 1:
        l = []
        while np.all(S[i, :] == S[i + 1, :]):
            l.append(I[i])
            i += 1
        if len(l) > 0:
            l.append(I[i])
            res.append(l)
        i += 1

    return res 
Example 2
Project: pymoo   Author: msu-coinlab   File: reference_direction.py    License: Apache License 2.0 6 votes vote down vote up
def do(self):

        # set the random seed if it is provided
        if self.seed is not None:
            np.random.seed(self.seed)

        if self.n_dim == 1:
            return np.array([[1.0]])
        else:

            val = self._do()
            if isinstance(val, tuple):
                ref_dirs, other = val[0], val[1:]
            else:
                ref_dirs = val

            if self.scaling is not None:
                ref_dirs = scale_reference_directions(ref_dirs, self.scaling)

            # do ref_dirs is desired
            if self.lexsort:
                I = np.lexsort([ref_dirs[:, j] for j in range(ref_dirs.shape[1])][::-1])
                ref_dirs = ref_dirs[I]

            return ref_dirs 
Example 3
Project: recruit   Author: Frank-qlu   File: test_algos.py    License: Apache License 2.0 6 votes vote down vote up
def test_groupsort_indexer():
    a = np.random.randint(0, 1000, 100).astype(np.int64)
    b = np.random.randint(0, 1000, 100).astype(np.int64)

    result = libalgos.groupsort_indexer(a, 1000)[0]

    # need to use a stable sort
    # np.argsort returns int, groupsort_indexer
    # always returns int64
    expected = np.argsort(a, kind='mergesort')
    expected = expected.astype(np.int64)

    tm.assert_numpy_array_equal(result, expected)

    # compare with lexsort
    # np.lexsort returns int, groupsort_indexer
    # always returns int64
    key = a * 1000 + b
    result = libalgos.groupsort_indexer(key, 1000000)[0]
    expected = np.lexsort((b, a))
    expected = expected.astype(np.int64)

    tm.assert_numpy_array_equal(result, expected) 
Example 4
Project: recruit   Author: Frank-qlu   File: multi.py    License: Apache License 2.0 6 votes vote down vote up
def is_monotonic_increasing(self):
        """
        return if the index is monotonic increasing (only equal or
        increasing) values.
        """

        # reversed() because lexsort() wants the most significant key last.
        values = [self._get_level_values(i).values
                  for i in reversed(range(len(self.levels)))]
        try:
            sort_order = np.lexsort(values)
            return Index(sort_order).is_monotonic
        except TypeError:

            # we have mixed types and np.lexsort is not happy
            return Index(self.values).is_monotonic 
Example 5
Project: tenpy   Author: tenpy   File: lattice.py    License: GNU General Public License v3.0 6 votes vote down vote up
def order(self, order_):
        # update the value itself
        self._order = order_
        # and the other stuff which is cached
        self._perm = np.lexsort(order_.T)
        # use advanced numpy indexing...
        self._mps2lat_vals_idx = np.empty(self.shape, np.intp)
        self._mps2lat_vals_idx[tuple(order_.T)] = np.arange(self.N_sites)
        # versions for fixed u
        self._mps_fix_u = []
        self._mps2lat_vals_idx_fix_u = []
        for u in range(len(self.unit_cell)):
            mps_fix_u = np.nonzero(order_[:, -1] == u)[0]
            self._mps_fix_u.append(mps_fix_u)
            mps2lat_vals_idx = np.empty(self.Ls, np.intp)
            mps2lat_vals_idx[tuple(order_[mps_fix_u, :-1].T)] = np.arange(self.N_cells)
            self._mps2lat_vals_idx_fix_u.append(mps2lat_vals_idx)
        self._mps_fix_u = tuple(self._mps_fix_u) 
Example 6
Project: tenpy   Author: tenpy   File: np_conserved.py    License: GNU General Public License v3.0 6 votes vote down vote up
def isort_qdata(self):
        """(Lexiographically) sort ``self._qdata``; in place.

        Lexsort ``self._qdata`` and ``self._data`` and set ``self._qdata_sorted = True``.
        """
        if self._qdata_sorted:
            return
        if len(self._qdata) < 2:
            self._qdata_sorted = True
            return
        perm = np.lexsort(self._qdata.T)
        self._qdata = self._qdata[perm, :]
        self._data = [self._data[p] for p in perm]
        self._qdata_sorted = True

    # reshaping =============================================================== 
Example 7
Project: tenpy   Author: tenpy   File: test_lattice.py    License: GNU General Public License v3.0 6 votes vote down vote up
def test_possible_couplings():
    lat_reg = lattice.Honeycomb(2,
                                3, [None, None],
                                order="snake",
                                bc="periodic",
                                bc_MPS="infinite")
    lat_irreg = lattice.IrregularLattice(lat_reg, remove=[[0, 0, 0]])
    u0, u1 = 0, 1
    for lat in [lat_reg, lat_irreg]:
        for dx in [(0, 0), (0, 1), (2, 1), (-1, -1)]:
            print("dx =", dx)
            mps0, mps1, lat_indices, coupling_shape = lat.possible_couplings(u0, u1, dx)
            ops = [(None, [0, 0], u0), (None, dx, u1)]
            m_ijkl, m_lat_indices, m_coupling_shape = lat.possible_multi_couplings(ops)
            assert coupling_shape == m_coupling_shape
            if len(lat_indices) == 0:
                continue
            sort = np.lexsort(lat_indices.T)
            mps0, mps1, lat_indices = mps0[sort], mps1[sort], lat_indices[sort, :]
            assert m_ijkl.shape == (len(mps0), 2)
            m_sort = np.lexsort(m_lat_indices.T)
            m_ijkl, m_lat_indices = m_ijkl[m_sort, :], m_lat_indices[m_sort, :]
            npt.assert_equal(m_lat_indices, lat_indices)
            npt.assert_equal(mps0, m_ijkl[:, 0])
            npt.assert_equal(mps1, m_ijkl[:, 1]) 
Example 8
Project: lambda-packs   Author: ryfeus   File: coo.py    License: MIT License 6 votes vote down vote up
def _sum_duplicates(self, row, col, data):
        # Assumes (data, row, col) not in canonical format.
        if len(data) == 0:
            return row, col, data
        order = np.lexsort((row, col))
        row = row[order]
        col = col[order]
        data = data[order]
        unique_mask = ((row[1:] != row[:-1]) |
                       (col[1:] != col[:-1]))
        unique_mask = np.append(True, unique_mask)
        row = row[unique_mask]
        col = col[unique_mask]
        unique_inds, = np.nonzero(unique_mask)
        data = np.add.reduceat(data, unique_inds, dtype=self.dtype)
        return row, col, data 
Example 9
Project: vnpy_crypto   Author: birforce   File: test_resample.py    License: MIT License 6 votes vote down vote up
def test_resample_group_info(self):  # GH10914
        for n, k in product((10000, 100000), (10, 100, 1000)):
            dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
            ts = Series(np.random.randint(0, n // k, n).astype('int64'),
                        index=np.random.choice(dr, n))

            left = ts.resample('30T').nunique()
            ix = date_range(start=ts.index.min(), end=ts.index.max(),
                            freq='30T')

            vals = ts.values
            bins = np.searchsorted(ix.values, ts.index, side='right')

            sorter = np.lexsort((vals, bins))
            vals, bins = vals[sorter], bins[sorter]

            mask = np.r_[True, vals[1:] != vals[:-1]]
            mask |= np.r_[True, bins[1:] != bins[:-1]]

            arr = np.bincount(bins[mask] - 1,
                              minlength=len(ix)).astype('int64', copy=False)
            right = Series(arr, index=ix)

            assert_series_equal(left, right) 
Example 10
Project: vnpy_crypto   Author: birforce   File: test_algos.py    License: MIT License 6 votes vote down vote up
def test_groupsort_indexer():
    a = np.random.randint(0, 1000, 100).astype(np.int64)
    b = np.random.randint(0, 1000, 100).astype(np.int64)

    result = libalgos.groupsort_indexer(a, 1000)[0]

    # need to use a stable sort
    # np.argsort returns int, groupsort_indexer
    # always returns int64
    expected = np.argsort(a, kind='mergesort')
    expected = expected.astype(np.int64)

    tm.assert_numpy_array_equal(result, expected)

    # compare with lexsort
    # np.lexsort returns int, groupsort_indexer
    # always returns int64
    key = a * 1000 + b
    result = libalgos.groupsort_indexer(key, 1000000)[0]
    expected = np.lexsort((b, a))
    expected = expected.astype(np.int64)

    tm.assert_numpy_array_equal(result, expected) 
Example 11
Project: vnpy_crypto   Author: birforce   File: multi.py    License: MIT License 6 votes vote down vote up
def is_monotonic_increasing(self):
        """
        return if the index is monotonic increasing (only equal or
        increasing) values.
        """

        # reversed() because lexsort() wants the most significant key last.
        values = [self._get_level_values(i).values
                  for i in reversed(range(len(self.levels)))]
        try:
            sort_order = np.lexsort(values)
            return Index(sort_order).is_monotonic
        except TypeError:

            # we have mixed types and np.lexsort is not happy
            return Index(self.values).is_monotonic 
Example 12
Project: tensorflow-DeepFM   Author: ChenglongChen   File: metrics.py    License: MIT License 5 votes vote down vote up
def gini(actual, pred):
    assert (len(actual) == len(pred))
    all = np.asarray(np.c_[actual, pred, np.arange(len(actual))], dtype=np.float)
    all = all[np.lexsort((all[:, 2], -1 * all[:, 1]))]
    totalLosses = all[:, 0].sum()
    giniSum = all[:, 0].cumsum().sum() / totalLosses

    giniSum -= (len(actual) + 1) / 2.
    return giniSum / len(actual) 
Example 13
Project: models   Author: kipoi   File: gtf_utils.py    License: MIT License 5 votes vote down vote up
def get_all_exons(self):
        exons = np.vstack([i.exons for i in self.trans])
        exons = np.unique(exons, axis=0)
        ind = np.lexsort((exons[:,1],exons[:,0]))
        if self.strand == '-':
            ind = ind[::-1]
        exons = exons[ind]
        return exons 
Example 14
Project: models   Author: kipoi   File: gtf_utils.py    License: MIT License 5 votes vote down vote up
def get_all_introns(self):
        for j in range(len(self.trans)):
            self.trans[j].add_introns()
        introns = np.vstack([i.introns for i in self.trans])
        introns = np.unique(introns, axis=0)
        ind = np.lexsort((introns[:,1],introns[:,0]))
        if self.strand == '-':
            ind = ind[::-1]
        introns = introns[ind]
        return introns 
Example 15
Project: models   Author: kipoi   File: gtf_utils.py    License: MIT License 5 votes vote down vote up
def get_all_exons(self):
        exons = np.vstack([i.exons for i in self.trans])
        exons = np.unique(exons, axis=0)
        ind = np.lexsort((exons[:,1],exons[:,0]))
        if self.strand == '-':
            ind = ind[::-1]
        exons = exons[ind]
        return exons 
Example 16
Project: models   Author: kipoi   File: gtf_utils.py    License: MIT License 5 votes vote down vote up
def get_all_introns(self):
        for j in range(len(self.trans)):
            self.trans[j].add_introns()
        introns = np.vstack([i.introns for i in self.trans])
        introns = np.unique(introns, axis=0)
        ind = np.lexsort((introns[:,1],introns[:,0]))
        if self.strand == '-':
            ind = ind[::-1]
        introns = introns[ind]
        return introns 
Example 17
Project: models   Author: kipoi   File: dataloader.py    License: MIT License 5 votes vote down vote up
def _get_spliceSites(self, gene):
        ''' Get splice site sequence for all transcripts of a single gene.
        Applied for normal gtf annotation.
        '''
        spliceSites = []
        for transcript in gene.trans:
            exons = transcript.exons
            ind = np.lexsort((exons[:, 1], exons[:, 0]))
            if len(exons) > 1:
                if gene.strand == "+":
                    seq_ranges = exons[:-1, 1].reshape(-1, 1) + np.array([-self.overhang + 1, self.overhang])
                else:
                    ind = ind[::-1]
                    exons = exons[ind]
                    seq_ranges = exons[:-1, 0].reshape(-1, 1) + np.array([-self.overhang, self.overhang - 1])
                for i in range(seq_ranges.shape[0]):
                    spliceSite = SpliceSite(gene.chrom,
                                            seq_ranges[i, 0],
                                            seq_ranges[i, 1],
                                            gene.strand,
                                            transcript.tranID,
                                            gene.geneID,
                                            gene.biotype,
                                            i)
                    # can call get_seq later in iterator to save memory
                    # spliceSite.seq = spliceSite.get_seq(self.fasta)
                    spliceSites.append(spliceSite)
        return spliceSites 
Example 18
Project: models   Author: kipoi   File: gtf_utils.py    License: MIT License 5 votes vote down vote up
def get_all_introns(self):
        for j in range(len(self.trans)):
            self.trans[j].add_introns()
        introns = np.vstack([i.introns for i in self.trans])
        introns = np.unique(introns, axis=0)
        ind = np.lexsort((introns[:,1],introns[:,0]))
        if self.strand == '-':
            ind = ind[::-1]
        introns = introns[ind]
        return introns 
Example 19
Project: models   Author: kipoi   File: dataloader.py    License: MIT License 5 votes vote down vote up
def _get_spliceSites(self, gene):
        ''' Get splice site sequence for all transcripts of a single gene.
        Applied for normal gtf annotation.
        '''
        spliceSites = []
        for transcript in gene.trans:
            exons = transcript.exons
            ind = np.lexsort((exons[:, 1], exons[:, 0]))
            if len(exons) > 1:
                if self.side == "5prime":
                    if gene.strand == "+":
                        seq_ranges = exons[:-1, 1].reshape(-1, 1) + np.array([-self.overhang_l + 1, self.overhang_r])
                    else:
                        ind = ind[::-1]
                        exons = exons[ind]
                        seq_ranges = exons[:-1, 0].reshape(-1, 1) + np.array([-self.overhang_r, self.overhang_l - 1])
                else:
                    if gene.strand == "+":
                        seq_ranges = exons[1:, 0].reshape(-1, 1) + np.array([-self.overhang_r, self.overhang_l - 1])
                    else:
                        ind = ind[::-1]
                        exons = exons[ind]
                        seq_ranges = exons[1:, 1].reshape(-1, 1) + np.array([-self.overhang_l + 1, self.overhang_r])

                for i in range(seq_ranges.shape[0]):
                    spliceSite = SpliceSite(gene.chrom,
                                            seq_ranges[i, 0],
                                            seq_ranges[i, 1],
                                            gene.strand,
                                            transcript.tranID,
                                            gene.geneID,
                                            gene.biotype,
                                            i)
                    # can call get_seq later in iterator to save memory
                    # spliceSite.seq = spliceSite.get_seq(self.fasta)
                    spliceSites.append(spliceSite)
        return spliceSites 
Example 20
Project: pymoo   Author: msu-coinlab   File: reference_direction.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, n_dim, scaling=None, lexsort=True, verbose=False, seed=None, **kwargs) -> None:
        super().__init__()
        self.n_dim = n_dim
        self.scaling = scaling
        self.lexsort = lexsort
        self.verbose = verbose
        self.seed = seed 
Example 21
Project: LanczosNetwork   Author: lrjconan   File: data_helper.py    License: MIT License 5 votes vote down vote up
def check_symmetric(m, tol=1e-8):

  if sp.issparse(m):
    if m.shape[0] != m.shape[1]:
      raise ValueError('m must be a square matrix')

    if not isinstance(m, sp.coo_matrix):
      m = sp.coo_matrix(m)

    r, c, v = m.row, m.col, m.data
    tril_no_diag = r > c
    triu_no_diag = c > r

    if triu_no_diag.sum() != tril_no_diag.sum():
      return False

    rl = r[tril_no_diag]
    cl = c[tril_no_diag]
    vl = v[tril_no_diag]
    ru = r[triu_no_diag]
    cu = c[triu_no_diag]
    vu = v[triu_no_diag]

    sortl = np.lexsort((cl, rl))
    sortu = np.lexsort((ru, cu))
    vl = vl[sortl]
    vu = vu[sortu]

    return np.allclose(vl, vu, atol=tol)
  else:
    return np.allclose(m, m.T, atol=tol) 
Example 22
Project: gnocchi   Author: gnocchixyz   File: carbonara.py    License: Apache License 2.0 5 votes vote down vote up
def median(self):
        ordered = numpy.lexsort((self._ts['values'], self.indexes))
        # TODO(gordc): can use np.divmod when centos supports numpy 1.13
        mid_diff = numpy.floor_divide(self.counts, 2)
        odd = numpy.mod(self.counts, 2)
        mid_floor = (numpy.cumsum(self.counts) - 1) - mid_diff
        mid_ceil = mid_floor + (odd + 1) % 2
        return make_timeseries(
            self.tstamps, (self._ts['values'][ordered][mid_floor] +
                           self._ts['values'][ordered][mid_ceil]) / 2.0) 
Example 23
Project: gnocchi   Author: gnocchixyz   File: carbonara.py    License: Apache License 2.0 5 votes vote down vote up
def quantile(self, q):
        ordered = numpy.lexsort((self._ts['values'], self.indexes))
        min_pos = numpy.cumsum(self.counts) - self.counts
        real_pos = min_pos + (self.counts - 1) * (q / 100)
        floor_pos = numpy.floor(real_pos).astype(numpy.int, copy=False)
        ceil_pos = numpy.ceil(real_pos).astype(numpy.int, copy=False)
        values = (
            self._ts['values'][ordered][floor_pos] * (ceil_pos - real_pos) +
            self._ts['values'][ordered][ceil_pos] * (real_pos - floor_pos))
        # NOTE(gordc): above code doesn't compute proper value if pct lands on
        # exact index, it sets it to 0. we need to set it properly here
        exact_pos = numpy.equal(floor_pos, ceil_pos)
        values[exact_pos] = self._ts['values'][ordered][floor_pos][exact_pos]
        return make_timeseries(self.tstamps, values) 
Example 24
Project: pyscf   Author: pyscf   File: geom.py    License: Apache License 2.0 5 votes vote down vote up
def argsort_coords(coords, decimals=None):
    if decimals is None:
        decimals = int(-numpy.log10(TOLERANCE)) - 1
    coords = numpy.around(coords, decimals=decimals)
    idx = numpy.lexsort((coords[:,2], coords[:,1], coords[:,0]))
    return idx 
Example 25
Project: kaggle-code   Author: CNuge   File: tf_nn_classification_bad.py    License: MIT License 5 votes vote down vote up
def gini(actual, pred, cmpcol = 0, sortcol = 1):
	assert( len(actual) == len(pred) )
	all = np.asarray(np.c_[ actual, pred, np.arange(len(actual)) ], dtype=np.float)
	all = all[ np.lexsort((all[:,2], -1*all[:,1])) ]
	totalLosses = all[:,0].sum()
	giniSum = all[:,0].cumsum().sum() / totalLosses

	giniSum -= (len(actual) + 1) / 2.
	return giniSum / len(actual) 
Example 26
Project: kaggle-code   Author: CNuge   File: insurance_tf_nn_classification_upsample.py    License: MIT License 5 votes vote down vote up
def gini(actual, pred, cmpcol = 0, sortcol = 1):
	assert( len(actual) == len(pred) )
	all = np.asarray(np.c_[ actual, pred, np.arange(len(actual)) ], dtype=np.float)
	all = all[ np.lexsort((all[:,2], -1*all[:,1])) ]
	totalLosses = all[:,0].sum()
	giniSum = all[:,0].cumsum().sum() / totalLosses

	giniSum -= (len(actual) + 1) / 2.
	return giniSum / len(actual) 
Example 27
Project: kaggle-code   Author: CNuge   File: insurance_tf_nn_classification_downsample.py    License: MIT License 5 votes vote down vote up
def gini(actual, pred, cmpcol = 0, sortcol = 1):
	assert( len(actual) == len(pred) )
	all = np.asarray(np.c_[ actual, pred, np.arange(len(actual)) ], dtype=np.float)
	all = all[ np.lexsort((all[:,2], -1*all[:,1])) ]
	totalLosses = all[:,0].sum()
	giniSum = all[:,0].cumsum().sum() / totalLosses

	giniSum -= (len(actual) + 1) / 2.
	return giniSum / len(actual) 
Example 28
Project: kaggle-code   Author: CNuge   File: tf_nn_classification.py    License: MIT License 5 votes vote down vote up
def gini(actual, pred, cmpcol = 0, sortcol = 1):
	assert( len(actual) == len(pred) )
	all = np.asarray(np.c_[ actual, pred, np.arange(len(actual)) ], dtype=np.float)
	all = all[ np.lexsort((all[:,2], -1*all[:,1])) ]
	totalLosses = all[:,0].sum()
	giniSum = all[:,0].cumsum().sum() / totalLosses

	giniSum -= (len(actual) + 1) / 2.
	return giniSum / len(actual) 
Example 29
Project: keras-yolo3   Author: bing0037   File: kmeans.py    License: MIT License 5 votes vote down vote up
def txt2clusters(self):
        all_boxes = self.txt2boxes()
        result = self.kmeans(all_boxes, k=self.cluster_number)
        result = result[np.lexsort(result.T[0, None])]
        self.result2txt(result)
        print("K anchors:\n {}".format(result))
        print("Accuracy: {:.2f}%".format(
            self.avg_iou(all_boxes, result) * 100)) 
Example 30
Project: recruit   Author: Frank-qlu   File: test_regression.py    License: Apache License 2.0 5 votes vote down vote up
def test_lexsort(self):
        # Lexsort memory error
        v = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
        assert_equal(np.lexsort(v), 0)