Python scipy.sparse.vstack() Examples

The following are 30 code examples for showing how to use scipy.sparse.vstack(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module scipy.sparse , or try the search function .

Example 1
Project: EDeN   Author: fabriziocosta   File: ml.py    License: MIT License 6 votes vote down vote up
def make_data_matrix(positive_data_matrix=None,
                     negative_data_matrix=None,
                     target=None):
    """make_data_matrix."""
    assert(positive_data_matrix is not None), 'ERROR: expecting non null\
    positive_data_matrix'
    if negative_data_matrix is None:
        negative_data_matrix = positive_data_matrix.multiply(-1)
    if target is None and negative_data_matrix is not None:
        yp = [1] * positive_data_matrix.shape[0]
        yn = [-1] * negative_data_matrix.shape[0]
        y = np.array(yp + yn)
        data_matrix = vstack(
            [positive_data_matrix, negative_data_matrix], format="csr")
    if target is not None:
        data_matrix = positive_data_matrix
        y = target
    return data_matrix, y 
Example 2
Project: EDeN   Author: fabriziocosta   File: graph.py    License: MIT License 6 votes vote down vote up
def auto_label(graphs, n_clusters=16, **opts):
    """Label nodes with cluster id.

    Cluster nodes using as features the output of vertex_vectorize.
    """
    data_list = Vectorizer(**opts).vertex_transform(graphs)
    data_matrix = vstack(data_list)
    clu = MiniBatchKMeans(n_clusters=n_clusters, n_init=10)
    clu.fit(data_matrix)
    preds = clu.predict(data_matrix)
    vecs = clu.transform(data_matrix)
    sizes = [m.shape[0] for m in data_list]
    label_list = []
    vecs_list = []
    pointer = 0
    for size in sizes:
        label_list.append(preds[pointer: pointer + size])
        vecs_list.append(vecs[pointer: pointer + size])
        pointer += size
    return label_list, vecs_list 
Example 3
Project: neuropythy   Author: noahbenson   File: core.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def apply_affine(aff, coords):
    '''
    apply_affine(affine, coords) yields the result of applying the given affine transformation to
      the given coordinate or coordinates.

    This function expects coords to be a (dims X n) matrix but if the first dimension is neither 2
    nor 3, coords.T is used; i.e.:
      apply_affine(affine3x3, coords2xN) ==> newcoords2xN
      apply_affine(affine4x4, coords3xN) ==> newcoords3xN
      apply_affine(affine3x3, coordsNx2) ==> newcoordsNx2 (for N != 2)
      apply_affine(affine4x4, coordsNx3) ==> newcoordsNx3 (for N != 3)
    '''
    if aff is None: return coords
    (coords,tr) = (np.asanyarray(coords), False)
    if len(coords.shape) == 1: return np.squeeze(apply_affine(np.reshape(coords, (-1,1)), aff))
    elif len(coords.shape) > 2: raise ValueError('cannot apply affine to ND-array for N > 2')
    if   len(coords) == 2: aff = to_affine(aff, 2)
    elif len(coords) == 3: aff = to_affine(aff, 3)
    else: (coords,aff,tr) = (coords.T, to_affine(aff, coords.shape[1]), True)
    r = np.dot(aff, np.vstack([coords, np.ones([1,coords.shape[1]])]))[:-1]
    return r.T if tr else r 
Example 4
Project: neuropythy   Author: noahbenson   File: core.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def curve_length(self, start=None, end=None, precision=0.01):
        '''
        Calculates the length of the curve by dividing the curve up
        into pieces of parameterized-length <precision>.
        '''
        if start is None: start = self.t[0]
        if end is None: end = self.t[-1]
        from scipy import interpolate
        if self.order == 1:
            # we just want to add up along the steps...
            ii = [ii for (ii,t) in enumerate(self.t) if start < t and t < end]
            ts = np.concatenate([[start], self.t[ii], [end]])
            xy = np.vstack([[self(start)], self.coordinates[:,ii].T, [self(end)]])
            return np.sum(np.sqrt(np.sum((xy[1:] - xy[:-1])**2, axis=1)))
        else:
            t = np.linspace(start, end, int(np.ceil((end-start)/precision)))
            dt = t[1] - t[0]
            dx = interpolate.splev(t, self.splrep[0], der=1)
            dy = interpolate.splev(t, self.splrep[1], der=1)
            return np.sum(np.sqrt(dx**2 + dy**2)) * dt 
Example 5
Project: neuropythy   Author: noahbenson   File: core.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def subcurve(self, t0, t1):
        '''
        curve.subcurve(t0, t1) yields a curve-spline object that is equivalent to the given
          curve but that extends from curve(t0) to curve(t1) only.
        '''
        # if t1 is less than t0, then we want to actually do this in reverse...
        if t1 == t0: raise ValueError('Cannot take subcurve of a point')
        if t1 < t0:
            tt = self.curve_length()
            return self.reverse().subcurve(tt - t0, tt - t1)
        idx = [ii for (ii,t) in enumerate(self.t) if t0 < t and t < t1]
        pt0 = self(t0)
        pt1 = self(t1)
        coords = np.vstack([[pt0], self.coordinates.T[idx], [pt1]])
        ts = np.concatenate([[t0], self.t[idx], [t1]])
        dists  = None if self.distances is None else np.diff(ts)
        return CurveSpline(
            coords.T,
            order=self.order,
            smoothing=self.smoothing,
            periodic=False,
            distances=dists,
            meta_data=self.meta_data) 
Example 6
Project: contextualbandits   Author: david-cortes   File: utils.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def get_batch(self, X, y):
        if self.curr == 0:
            self.add_obs(X, y)
            return X, y

        if (self.curr < self.n) and (isinstance(self.X_reserve, list)):
            if not self.has_sparse:
                old_X = np.concatenate(self.X_reserve, axis=0)
            else:
                old_X = sp_vstack(self.X_reserve)
            old_y = np.concatenate(self.y_reserve, axis=0)
        else:
            old_X = self.X_reserve[:self.curr].copy()
            old_y = self.y_reserve[:self.curr].copy()

        if X.shape[0] == 0:
            return old_X, old_y
        else:
            self.add_obs(X, y)

        if not issparse(old_X) and not issparse(X):
            return np.r_[old_X, X], np.r_[old_y, y]
        else:
            return sp_vstack([old_X, X]), np.r_[old_y, y] 
Example 7
Project: OpenNE   Author: thunlp   File: utils.py    License: MIT License 6 votes vote down vote up
def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx 
Example 8
Project: dgi   Author: dfdazac   File: utils.py    License: MIT License 6 votes vote down vote up
def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx 
Example 9
def _concatenate_dense_jac(jac_list):
    # Read sequentially all jacobians.
    # Convert all values to numpy arrays.
    jac_ineq_list = []
    jac_eq_list = []
    for jac_tuple in jac_list:
        J_ineq, J_eq = jac_tuple
        if spc.issparse(J_ineq):
            jac_ineq_list += [J_ineq.toarray()]
        else:
            jac_ineq_list += [np.atleast_2d(J_ineq)]
        if spc.issparse(J_eq):
            jac_eq_list += [J_eq.toarray()]
        else:
            jac_eq_list += [np.atleast_2d(J_eq)]
    # Concatenate all
    J_ineq = np.vstack(jac_ineq_list)
    J_eq = np.vstack(jac_eq_list)
    # Return
    return J_ineq, J_eq 
Example 10
Project: ektelo   Author: ektelo   File: selection.py    License: Apache License 2.0 6 votes vote down vote up
def expand_offsets(cur_rect_l, cur_rect_u, offsets):
        '''
        Expand offsets at different level along each dimension to generate the 
        final offsets for all candidate by computing the sum of each tuple in the 
        cross product of offset arrays.
        e.g For the some dimension two level offsets [[0, 1, 0], [2, 4, 2]] will be expanded to 
        [2 4 2 3 5 3 2 4 2]
        cur_rect_l and cur_rect_u: coordinates of the lower and upper corner of the range.
        offsets: Nested array representing offsets of ranges along dimension, level of hierarchy    

        ''' 
        # remove empty list(no query at this level)
        offsets = [list(filter(lambda x: len(x) > 0, d)) for d in offsets]
        assert all([len(d) == len(offsets[0]) for d in offsets]),\
               "Shape of offsets along each dimension should match."    
        if len(offsets[0]) < 1:
            return [], []   
        # expand offsets across different levels.
        expanded_offsets = [HierarchicalRanges.quick_product(*d).sum(axis=0) for d in offsets] 
        lower = np.vstack([ l + offset for l, offset in zip(cur_rect_l, expanded_offsets)]).T
        upper = np.vstack([ u + offset for u, offset in zip(cur_rect_u, expanded_offsets)]).T
        return lower, upper 
Example 11
Project: ektelo   Author: ektelo   File: selection.py    License: Apache License 2.0 6 votes vote down vote up
def select(self):
        QtQ = self.W.gram().dense_matrix()
        n = self.domain_shape[0]
        err, inv, weights, queries = self._GreedyHierByLv(
            QtQ, n, 0, withRoot=False)

        # form matrix from queries and weights
        row_list = []
        for q, w in zip(queries, weights):
            if w > 0:
                row = np.zeros(self.domain_shape[0])
                row[q[0]:q[1] + 1] = w
                row_list.append(row)
        mat = np.vstack(row_list)
        mat = sparse.csr_matrix(mat) if sparse.issparse(mat) is False else mat

        return matrix.EkteloMatrix(mat) 
Example 12
Project: pyxclib   Author: kunaldahiya   File: data_statistics.py    License: MIT License 6 votes vote down vote up
def compute(self, train_features, train_labels, 
                test_features=None, test_labels=None):
        """Compute features for given data. Test data is optional.
        Args:
            train_features: csr_matrix: train features
            train_labels: csr_matrix: train labels
            test_features: csr_matrix: test features
            test_labels: csr_matrix: test labels
        """
        self.n_train_samples, self.n_features = train_features.shape
        self.n_labels = train_labels.shape[1] 
        if test_features is not None:
            self.n_test_samples = test_features.shape[0]
            features = vstack([train_features, test_features]).tocsr()
            labels = vstack([train_labels, test_labels]).tocsr()
        else:
            features = train_features
            labels = train_labels
        self.n_avg_samples_per_label = self.compute_avg_samples_per_label(labels)
        self.n_avg_labels_per_sample = self.compute_avg_labels_per_sample(labels)
        self.avg_doc_length = self.compute_avg_doc_length(features) 
Example 13
Project: recoder   Author: amoussawi   File: data.py    License: MIT License 6 votes vote down vote up
def _extract(self, sparse_matrix, index):

    if sputils.issequence(index) and len(index) > CSR_MATRIX_INDEX_SIZE_LIMIT:
      # It happens that scipy implements the indexing of a csr_matrix with a list using
      # matrix multiplication, which gets to be an issue if the size of the index list is
      # large and lead to memory issues
      # Reference: https://stackoverflow.com/questions/46034212/sparse-matrix-slicing-memory-error/46040827#46040827

      # In order to solve this issue, simply chunk the index into smaller indices of
      # size CSR_MATRIX_INDEX_SIZE_LIMIT and then stack the extracted chunks

      sparse_matrix_slices = []
      for offset in range(0, len(index), CSR_MATRIX_INDEX_SIZE_LIMIT):
        sparse_matrix_slices.append(sparse_matrix[index[offset: offset + CSR_MATRIX_INDEX_SIZE_LIMIT]])

      extracted_sparse_matrix = sparse.vstack(sparse_matrix_slices)
    else:
      extracted_sparse_matrix = sparse_matrix[index]

    return extracted_sparse_matrix 
Example 14
Project: Wordbatch   Author: anttttti   File: batcher.py    License: GNU General Public License v2.0 6 votes vote down vote up
def merge_batches(self, data):
		"""Merge a list of data minibatches into one single instance representing the data

		Parameters
		----------
		data: list
			List of minibatches to merge

		Returns
		-------
		(anonymous): sparse matrix | pd.DataFrame | list
			Single complete list-like data merged from given batches
		"""
		if isinstance(data[0], ssp.csr_matrix):  return ssp.vstack(data)
		if isinstance(data[0], pd.DataFrame) or isinstance(data[0], pd.Series):  return pd.concat(data)
		return [item for sublist in data for item in sublist] 
Example 15
Project: tf-gnn-samples   Author: microsoft   File: citation_network_utils.py    License: MIT License 6 votes vote down vote up
def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        # All of these will need to be sorted:
        sort_indices = np.lexsort(np.rot90(coords))
        return coords[sort_indices], values[sort_indices], shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx 
Example 16
Project: DGI   Author: PetarV-   File: process.py    License: MIT License 6 votes vote down vote up
def sparse_to_tuple(sparse_mx, insert_batch=False):
    """Convert sparse matrix to tuple representation."""
    """Set insert_batch=True if you want to insert a batch dimension."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        if insert_batch:
            coords = np.vstack((np.zeros(mx.row.shape[0]), mx.row, mx.col)).transpose()
            values = mx.data
            shape = (1,) + mx.shape
        else:
            coords = np.vstack((mx.row, mx.col)).transpose()
            values = mx.data
            shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx 
Example 17
def allMB_multi(casesPerTask,datasets,mbNumber):
    if mbNumber == 0:
        # to begin a new epoch, permute each dataset first, then sequencially use training data in new order
        for i in range(len(datasets)):
            datasets[i].perm()

    inpsList = []
    targs = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32)
    targsMask = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32)
    for i in range(len(datasets)):
        # in case that we need to use certain datasets multiple times in one epoch
        idx = [ xx % (datasets[i].inps.shape[0]) for xx in range(casesPerTask[i]*(mbNumber-1), casesPerTask[i]*mbNumber)]
        inpsList.append(datasets[i].inps[idx])
        targs[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)])] = datasets[i].targsFull[idx]
        targsMask[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)]), i] = 1
    if isinstance(inpsList[0], num.ndarray):
        inps = num.vstack(inpsList)
    else:
        inps = sp.vstack(inpsList)
    return inps, targs, targsMask 
Example 18
Project: TextCategorization   Author: SuyashLakhotia   File: data.py    License: MIT License 6 votes vote down vote up
def _load(self):
        data_dir = os.path.abspath(os.path.join(os.path.curdir, "data", "RCV1", "pickles", "RCV1-v2_Sparse"))

        class_names = pickle.load(open(data_dir + "/class_names.pkl", "rb"))

        pkl_files = os.listdir(data_dir)
        docs_pkls = list(filter(lambda x: x.startswith("documents"), pkl_files))
        labels_pkls = list(filter(lambda x: x.startswith("labels"), pkl_files))
        docs_pkls.sort()
        labels_pkls.sort()

        documents = []
        for docs_pkl in docs_pkls:
            documents += pickle.load(open(data_dir + "/" + docs_pkl, "rb"))

        _labels = []
        for labels_pkl in labels_pkls:
            _labels += pickle.load(open(data_dir + "/" + labels_pkl, "rb"))
        labels = vstack(_labels)

        return documents, labels, class_names 
Example 19
Project: nonnegfac-python   Author: kimjingu   File: matrix_utils.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def sparse_remove_row(X, to_remove):
    """ Delete rows from a sparse matrix

    Parameters
    ----------
    X : scipy.sparse matrix
    to_remove : a list of row indices to be removed.

    Returns
    -------
    Y : scipy.sparse matrix
    """
    if not sps.isspmatrix_lil(X):
        X = X.tolil()

    to_keep = [i for i in iter(range(0, X.shape[0])) if i not in to_remove]
    Y = sps.vstack([X.getrowview(i) for i in to_keep])
    return Y 
Example 20
Project: modAL   Author: modAL-python   File: data.py    License: MIT License 6 votes vote down vote up
def data_vstack(blocks: Container) -> modALinput:
    """
    Stack vertically both sparse and dense arrays.

    Args:
        blocks: Sequence of modALinput objects.

    Returns:
        New sequence of vertically stacked elements.
    """
    if isinstance(blocks[0], np.ndarray):
        return np.concatenate(blocks)
    elif isinstance(blocks[0], list):
        return list(chain(blocks))
    elif sp.issparse(blocks[0]):
        return sp.vstack(blocks)
    else:
        try:
            return np.concatenate(blocks)
        except:
            raise TypeError('%s datatype is not supported' % type(blocks[0])) 
Example 21
Project: modAL   Author: modAL-python   File: core_tests.py    License: MIT License 6 votes vote down vote up
def test_data_vstack(self):
        for n_samples, n_features in product(range(1, 10), range(1, 10)):
            # numpy arrays
            a, b = np.random.rand(n_samples, n_features), np.random.rand(n_samples, n_features)
            np.testing.assert_almost_equal(
                modAL.utils.data.data_vstack((a, b)),
                np.concatenate((a, b))
            )

            # sparse matrices
            for format in ['lil', 'csc', 'csr']:
                a, b = sp.random(n_samples, n_features, format=format), sp.random(n_samples, n_features, format=format)
                self.assertEqual((modAL.utils.data.data_vstack((a, b)) != sp.vstack((a, b))).sum(), 0)

        # not supported formats
        self.assertRaises(TypeError, modAL.utils.data.data_vstack, (1, 1))

    # functions from modAL.utils.selection 
Example 22
Project: EDeN   Author: fabriziocosta   File: ml.py    License: MIT License 5 votes vote down vote up
def multiprocess_vectorize(iterators,
                           vectorizer=None,
                           pre_processor=None,
                           pre_processor_args=None,
                           fit_flag=False,
                           n_blocks=5,
                           block_size=None,
                           n_jobs=8):
    """multiprocess_vectorize."""
    iterators = list(iterators)
    # fitting happens in a serial fashion
    if fit_flag:
        if pre_processor is not None:
            if pre_processor_args is not None:
                graphs = pre_processor(iterators, **pre_processor_args)
            else:
                graphs = pre_processor(iterators)
        else:
            graphs = iterators
        vectorizer.fit(graphs)
    size = len(iterators)
    intervals = compute_intervals(size=size,
                                  n_blocks=n_blocks,
                                  block_size=block_size)
    if n_jobs == -1:
        pool = mp.Pool()
    else:
        pool = mp.Pool(n_jobs)
    results = [apply_async(pool, serial_vectorize,
                           args=(iterators[start:end],
                                 vectorizer,
                                 pre_processor,
                                 pre_processor_args,
                                 False))
               for start, end in intervals]
    output = [p.get() for p in results]
    pool.close()
    pool.join()
    data_matrix = vstack(output, format="csr")
    return data_matrix 
Example 23
Project: neuropythy   Author: noahbenson   File: core.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def vstack(tup):
    '''
    vstack(x) is equivalent to numpy.vstack(x) or scipy.sparse.vstack(x) except that it works
      correctly with both sparse and dense arrays (if any inputs are dense, it converts all inputs
      to dense arrays).
    '''
    if all([sps.issparse(u) for u in tup]): return sps.vstack(tup, format=tup[0].format)
    else: return np.vstack([u.toarray() if sps.issparse(u) else u for u in tup]) 
Example 24
Project: neuropythy   Author: noahbenson   File: core.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def repmat(x, r, c):
    '''
    repmat(x, r, c) is equivalent to numpy.matlib.repmat(x, r, c) except that it works correctly for
      sparse matrices.
    '''
    if sps.issparse(x):
        row = sps.hstack([x for _ in range(c)])
        return sps.vstack([row for _ in range(r)], format=x.format)
    else: return np.matlib.repmat(x, r, c) 
Example 25
Project: scanorama   Author: brianhie   File: scanorama.py    License: MIT License 5 votes vote down vote up
def dimensionality_reduce(datasets, dimred=DIMRED):
    X = vstack(datasets)
    X = reduce_dimensionality(X, dim_red_k=dimred)
    datasets_dimred = []
    base = 0
    for ds in datasets:
        datasets_dimred.append(X[base:(base + ds.shape[0]), :])
        base += ds.shape[0]
    return datasets_dimred

# Normalize and reduce dimensionality. 
Example 26
Project: scanorama   Author: brianhie   File: scanorama.py    License: MIT License 5 votes vote down vote up
def process_data(datasets, genes, hvg=HVG, dimred=DIMRED, verbose=False):
    # Only keep highly variable genes
    if not hvg is None and hvg > 0 and hvg < len(genes):
        if verbose:
            print('Highly variable filter...')
        X = vstack(datasets)
        disp = dispersion(X)
        highest_disp_idx = np.argsort(disp[0])[::-1]
        top_genes = set(genes[highest_disp_idx[range(hvg)]])
        for i in range(len(datasets)):
            gene_idx = [ idx for idx, g_i in enumerate(genes)
                         if g_i in top_genes ]
            datasets[i] = datasets[i][:, gene_idx]
        genes = np.array(sorted(top_genes))

    # Normalize.
    if verbose:
        print('Normalizing...')
    for i, ds in enumerate(datasets):
        datasets[i] = normalize(ds, axis=1)

    # Compute compressed embedding.
    if dimred > 0:
        if verbose:
            print('Reducing dimension...')
        datasets_dimred = dimensionality_reduce(datasets, dimred=dimred)
        if verbose:
            print('Done processing.')
        return datasets_dimred, genes

    if verbose:
        print('Done processing.')

    return datasets, genes

# Plot t-SNE visualization. 
Example 27
Project: deep_architect   Author: negrinho   File: hashing.py    License: MIT License 5 votes vote down vote up
def _refit(self):
        if self.model == None:
            self.model = lm.Ridge(alpha=self.weight_decay_coeff)

        X = sp.vstack(self.vecs_lst, format='csr')
        y = np.array(self.vals_lst)
        self.model.fit(X, y)

    # TODO: improve 
Example 28
Project: scVI   Author: YosefLab   File: dataset.py    License: MIT License 5 votes vote down vote up
def concatenate_arrays(arrays):
    # concatenate data
    if all([type(array) is np.ndarray for array in arrays]):
        concatenation = np.concatenate(arrays)
    # if sparse, cast all to sparse and stack
    else:
        concatenation = sp_sparse.vstack(
            [
                array
                if isinstance(array, sp_sparse.csr_matrix)
                else sp_sparse.csr_matrix(array)
                for array in arrays
            ]
        )
    return concatenation 
Example 29
Project: libact   Author: ntucllab   File: dataset.py    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
def append(self, feature, label=None):
        """
        Add a (feature, label) entry into the dataset.
        A None label indicates an unlabeled entry.

        Parameters
        ----------
        feature : {array-like}, shape = (n_features)
            Feature of the sample to append to dataset.

        label : {int, None}
            Label of the sample to append to dataset. None if unlabeled.

        Returns
        -------
        entry_id : {int}
            entry_id for the appened sample.
        """
        if isinstance(self._X, np.ndarray):
            self._X = np.vstack([self._X, feature])
        else: # sp.csr_matrix
            self._X = sp.vstack([self._X, feature])
        self._y = np.append(self._y, label)

        self.modified = True
        return len(self) - 1 
Example 30
Project: scattertext   Author: JasonKessler   File: CorpusBasedTermScorer.py    License: Apache License 2.0 5 votes vote down vote up
def _get_cat_and_ncat(self, X):
        if self.category_name_is_set_ is False:
            raise NeedToSetCategoriesException()
        cat_X = X[np.isin(self.corpus_.get_category_names_by_row(),
                          [self.category_name] + self.neutral_category_names), :]
        ncat_X = X[np.isin(self.corpus_.get_category_names_by_row(),
                           self.not_category_names + self.neutral_category_names), :]
        if len(self.neutral_category_names) > 0:
            neut_X = [np.isin(self.corpus_.get_category_names_by_row(), self.neutral_category_names)]
            cat_X = vstack([cat_X, neut_X])
            ncat_X = vstack([ncat_X, neut_X])
        return cat_X, ncat_X