Python scipy.sparse.hstack() Examples

The following are 30 code examples of scipy.sparse.hstack(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.sparse , or try the search function .
Example #1
Source File: pipeline.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def transform(self, X):
        """Transform X separately by each transformer, concatenate results.

        Parameters
        ----------
        X : iterable or array-like, depending on transformers
            Input data to be transformed.

        Returns
        -------
        X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
            hstack of results of transformers. sum_n_components is the
            sum of n_components (output dimension) over transformers.
        """
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(trans, X, None, weight)
            for name, trans, weight in self._iter())
        if not Xs:
            # All transformers are None
            return np.zeros((X.shape[0], 0))
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = np.hstack(Xs)
        return Xs 
Example #2
Source File: wordbatch_model.py    From mercari-price-suggestion with MIT License 6 votes vote down vote up
def predict(self, df):
        X_desc = self.wb_desc.transform(df["item_description"])
        X_desc = X_desc[:, self.desc_indices]

        X_name = 2 * self.cv_name.transform(df["name"])
        X_name2 = 0.5 * self.cv_name2.transform(df["name"])

        X_category0 = self.cv_cat0.transform(df['subcat_0'])
        X_category1 = self.cv_cat1.transform(df['subcat_1'])
        X_category2 = self.cv_cat2.transform(df['subcat_2'])
        X_brand = self.cv_brand.transform(df['brand_name'])
        X_condition = self.cv_condition.transform((df['item_condition_id'] + 10 * df["shipping"]).apply(str))

        df["cat_brand"] = [a + " " + b for a, b in zip(df["category_name"], df["brand_name"])]
        X_cat_brand = self.cv_cat_brand.transform(df["cat_brand"])
        X_desc3 = self.desc3.transform(df["item_description"])

        X = hstack((X_condition,
                    X_desc, X_brand,
                    X_category0, X_category1, X_category2,
                    X_name, X_name2,
                    X_cat_brand, X_desc3)).tocsr()

        return self.model.predict(X) 
Example #3
Source File: categorical.py    From Kaggler with MIT License 6 votes vote down vote up
def transform(self, X):
        """Encode categorical columns into sparse matrix with one-hot-encoding.

        Args:
            X (pandas.DataFrame): categorical columns to encode

        Returns:
            (scipy.sparse.coo_matrix): sparse matrix encoding categorical
                                       variables into dummy variables
        """

        for i, col in enumerate(X.columns):
            X_col = self._transform_col(X[col], i)
            if X_col is not None:
                if i == 0:
                    X_new = X_col
                else:
                    X_new = sparse.hstack((X_new, X_col))

            logger.debug('{} --> {} features'.format(
                col, self.label_encoder.label_maxes[i])
            )

        return X_new 
Example #4
Source File: designmatrix.py    From lightkurve with MIT License 6 votes vote down vote up
def append_constant(self, prior_mu=0, prior_sigma=np.inf, inplace=False):
        """Returns a new `.SparseDesignMatrix` with a column of ones appended.

        Returns
        -------
        `.SparseDesignMatrix`
            New design matrix with a column of ones appended. This column is
            named "offset".
        """
        if inplace:
            dm = self
        else:
            dm = self.copy()
        dm._X = hstack([dm.X, lil_matrix(np.ones(dm.shape[0])).T], format='lil')
        dm.prior_mu = np.append(dm.prior_mu, prior_mu)
        dm.prior_sigma = np.append(dm.prior_sigma, prior_sigma)
        return dm 
Example #5
Source File: designmatrix.py    From lightkurve with MIT License 6 votes vote down vote up
def __init__(self, matrices):
        if not np.all([issparse(m.X) for m in matrices]):
            # This collection is designed for sparse matrices, so we raise a warning if a dense DesignMatrix is passed
            warnings.warn(('Not all matrices are `SparseDesignMatrix` objects. '
                            'Dense matrices will be converted to sparse matrices.'), LightkurveWarning)
            sparse_matrices = []
            for m in matrices:
                if isinstance(m, DesignMatrix):
                    sparse_matrices.append(m.copy().to_sparse())
                else:
                    sparse_matrices.append(m)
            self.matrices = sparse_matrices
        else:
            self.matrices = matrices
        self.X = hstack([m.X for m in self.matrices], format='csr')
        self._child_class = SparseDesignMatrix
        self.validate() 
Example #6
Source File: core.py    From neuropythy with GNU Affero General Public License v3.0 6 votes vote down vote up
def to_curve_spline(obj):
    '''
    to_curve_spline(obj) obj if obj is a curve spline and otherwise attempts to coerce obj into a
      curve spline, raising an error if it cannot.
    '''
    if   is_curve_spline(obj):            return obj
    elif is_tuple(obj) and len(obj) == 2: (crds,opts) = obj
    else:                                 (crds,opts) = (obj,{})
    if pimms.is_matrix(crds) or is_curve_spline(crds): crds = [crds]
    spls = [c for c in crds if is_curve_spline(c)]
    opts = dict(opts)
    if 'weights' not in opts and len(spls) == len(crds):
        if all(c.weights is not None for c in crds):
            opts['weights'] = np.concatenate([c.weights for c in crds])
    if 'order' not in opts and len(spls) > 0:
        opts['order'] = np.min([c.order for c in spls])
    if 'smoothing' not in opts and len(spls) > 0:
        sm = set([c.smoothing for c in spls])
        if len(sm) == 1: opts['smoothing'] = list(sm)[0]
        else: opts['smoothing'] = None
    crds = [x.crds if is_curve_spline(crds) else np.asarray(x) for x in crds]
    crds = [x if x.shape[0] == 2 else x.T for x in crds]
    crds = np.hstack(crds)
    return curve_spline(crds, **opts) 
Example #7
Source File: designmatrix.py    From lightkurve with MIT License 6 votes vote down vote up
def plot(self, ax=None, **kwargs):
        """Visualize the design matrix values as an image.

        Uses Matplotlib's `~lightkurve.utils.plot_image` to visualize the
        matrix values.

        Parameters
        ----------
        ax : `~matplotlib.axes.Axes`
            A matplotlib axes object to plot into. If no axes is provided,
            a new one will be created.
        **kwargs : dict
            Extra parameters to be passed to `.plot_image`.

        Returns
        -------
        `~matplotlib.axes.Axes`
            The matplotlib axes object.
        """
        temp_dm = SparseDesignMatrix(hstack([d.X for d in self]))
        ax = temp_dm.plot(**kwargs)
        ax.set_title("Design Matrix Collection")
        return ax 
Example #8
Source File: designmatrix.py    From lightkurve with MIT License 6 votes vote down vote up
def __init__(self, matrices):
        if np.any([issparse(m.X) for m in matrices]):
            # This collection is designed for dense matrices, so we warn if a
            # SparseDesignMatrix is passed
            warnings.warn(('Some matrices are `SparseDesignMatrix` objects. '
                           'Sparse matrices will be converted to dense matrices.'),
                          LightkurveWarning)
            dense_matrices = []
            for m in matrices:
                if isinstance(m, SparseDesignMatrix):
                    dense_matrices.append(m.copy().to_dense())
                else:
                    dense_matrices.append(m)
            self.matrices = dense_matrices
        else:
            self.matrices = matrices
        self.X = np.hstack(tuple(m.X for m in self.matrices))
        self._child_class = DesignMatrix
        self.validate() 
Example #9
Source File: featurizer.py    From snips-nlu with Apache License 2.0 6 votes vote down vote up
def fit_transform(self, dataset, utterances, classes, none_class):
        import scipy.sparse as sp

        dataset = validate_and_format_dataset(dataset)
        self.language = dataset[LANGUAGE]

        utterances_texts = (get_text_from_chunks(u[DATA]) for u in utterances)
        if not any(tokenize_light(q, self.language) for q in utterances_texts):
            raise _EmptyDatasetUtterancesError(
                "Tokenized utterances are empty")

        x_tfidf = self._fit_transform_tfidf_vectorizer(
            utterances, classes, dataset)
        x = x_tfidf
        if self.config.added_cooccurrence_feature_ratio:
            self._fit_cooccurrence_vectorizer(
                utterances, classes, none_class, dataset)
            x_cooccurrence = self.cooccurrence_vectorizer.transform(utterances)
            x = sp.hstack((x_tfidf, x_cooccurrence))

        return x 
Example #10
Source File: features_generation_tools.py    From corpus-to-graph-ml with MIT License 6 votes vote down vote up
def get_compound_features(train_data, test_data, feature_gen_methods):
    train_features_list = []
    test_features_list = []

    for m in feature_gen_methods:
        train_features, test_features = m(train_data, test_data)
        train_features_list.append(train_features)
        test_features_list.append(test_features)

    train_features = train_features_list[0]
    test_features = test_features_list[0]

    for i in xrange(1,len(feature_gen_methods)):
        train_features = hstack((train_features, train_features_list[i]))
        test_features = hstack((test_features, test_features_list[i]))

    return train_features, test_features 
Example #11
Source File: loader_nfm.py    From knowledge_graph_attention_network with MIT License 6 votes vote down vote up
def generate_train_batch(self):

        users, pos_items, neg_items = self._generate_train_cf_batch()
        u_sp = self.user_one_hot[users]
        pos_i_sp = self.kg_feat_mat[pos_items]
        neg_i_sp = self.kg_feat_mat[neg_items]


        # Horizontally stack sparse matrices to get single positive & negative feature matrices
        pos_feats = sp.hstack([u_sp, pos_i_sp])
        neg_feats = sp.hstack([u_sp, neg_i_sp])

        batch_data = {}
        batch_data['pos_feats'] = pos_feats
        batch_data['neg_feats'] = neg_feats
        return batch_data 
Example #12
Source File: feature_expansion.py    From KDDCup2019_admin with MIT License 6 votes vote down vote up
def cat_onehot_encoder_m(df,y,col,selection=True):
    ## ZJN: test raise memory error
    # raise MemoryError


    mlbs = MultiLabelBinarizer(sparse_output=True).fit(df.values)
    from scipy.sparse import csr_matrix
    features_tmp = mlbs.transform(df.values)
    features_tmp = csr_matrix(features_tmp,dtype=float).tocsr()
    models = None
    auc_score = None
    if selection is True:
        auc_score, models = train_lightgbm_for_feature_selection(features_tmp, y)
        print(col, "auc", auc_score)
    #new_feature = pd.DataFrame(features_tmp,columns=["mul_feature_"+col])
    new_feature = features_tmp
    from scipy.sparse import hstack



    return new_feature,mlbs,models,auc_score 
Example #13
Source File: feature_for_test.py    From KDDCup2019_admin with MIT License 6 votes vote down vote up
def multi_features_for_test(df,columns,mlbs,models):

    new_features = {}
    #from multiprocessing import Pool
    #pool = Pool(processes=len(columns))

    for col in columns:
        if col in mlbs:
            mlb = mlbs[col]
            #model = models[col]
            model = None
            new_features[col] = multi_feature_for_one_col(df[col], mlb, model,col) #pool.apply_async(multi_feature_for_one_col, args=(df[col], mlb, model,col))

    new_features_list = []
    for col in columns:
        if col in new_features:
            new_features_list.append(new_features[col])
    from scipy.sparse import hstack
    new_features = hstack(new_features_list,dtype=float)
    #new_features = pd.concat(new_features_list,axis=1)

    return new_features 
Example #14
Source File: pandas_feature_union.py    From pandas-feature-union with MIT License 6 votes vote down vote up
def fit_transform(self, X, y=None, **fit_params):
        self._validate_transformers()
        result = Parallel(n_jobs=self.n_jobs)(
            delayed(_fit_transform_one)(
                transformer=trans,
                X=X,
                y=y,
                weight=weight,
                **fit_params)
            for name, trans, weight in self._iter())

        if not result:
            # All transformers are None
            return np.zeros((X.shape[0], 0))
        Xs, transformers = zip(*result)
        self._update_transformer_list(transformers)
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = self.merge_dataframes_by_column(Xs)
        return Xs 
Example #15
Source File: pandas_feature_union.py    From pandas-feature-union with MIT License 6 votes vote down vote up
def transform(self, X):
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(
                transformer=trans,
                X=X,
                y=None,
                weight=weight)
            for name, trans, weight in self._iter())
        if not Xs:
            # All transformers are None
            return np.zeros((X.shape[0], 0))
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = self.merge_dataframes_by_column(Xs)
        return Xs 
Example #16
Source File: backend.py    From mlens with MIT License 6 votes vote down vote up
def _propagate_features(self, task):
        """Propagate features from input array to output array."""
        p_out, p_in = self.job.predict_out, self.job.predict_in

        # Check for loss of obs between layers (i.e. with blendindex)
        n_in, n_out = p_in.shape[0], p_out.shape[0]
        r = int(n_in - n_out)

        if not issparse(p_in):
            # Simple item setting
            p_out[:, :task.n_feature_prop] = p_in[r:, task.propagate_features]
        else:
            # Need to populate propagated features using scipy sparse hstack
            self.job.predict_out = hstack(
                [p_in[r:, task.propagate_features],
                 p_out[:, task.n_feature_prop:]]
            ).tolil() 
Example #17
Source File: operator_utils.py    From grove with Apache License 2.0 6 votes vote down vote up
def __init__(self, labels_ops):
        """
        Encapsulates a set of linearly independent operators.

        :param (list|tuple) labels_ops: Sequence of tuples (label, operator) where label is a string
            and operator a qutip.Qobj operator representation.
        """
        self.ops_by_label = OrderedDict(labels_ops)
        self.labels = list(self.ops_by_label.keys())
        self.ops = list(self.ops_by_label.values())
        self.dim = len(self.ops)

        # the basis change transformation matrix from a representation in the operator basis
        # to the original basis. We enforce CSR sparse matrix representation to have efficient
        # matrix vector products.
        self.basis_transform = sphstack([qt.operator_to_vector(opj).data
                                         for opj in self.ops]).tocsr()
        self._metric = None
        self._is_orthonormal = None
        self._all_hermitian = None 
Example #18
Source File: xc_metrics.py    From pyxclib with MIT License 6 votes vote down vote up
def _setup_metric(X, true_labels, inv_psp=None, k=5):
    assert compatible_shapes(X, true_labels), \
        "ground truth and prediction matrices must have same shape."
    num_instances, num_labels = true_labels.shape
    indices = _get_topk(X, num_labels, k)
    ps_indices = None
    if inv_psp is not None:
        ps_indices = _get_topk(
            true_labels.dot(
                sp.spdiags(inv_psp, diags=0,
                           m=num_labels, n=num_labels)),
            num_labels, k)
        inv_psp = np.hstack([inv_psp, np.zeros((1))])

    true_labels = sp.hstack([true_labels,
                             sp.lil_matrix((num_instances, 1),
                                           dtype=np.int32)]).tocsr()
    return indices, true_labels, ps_indices, inv_psp 
Example #19
Source File: loader_nfm.py    From knowledge_graph_attention_network with MIT License 6 votes vote down vote up
def generate_test_feed_dict(self, model, user_batch, item_batch, drop_flag=True):
        user_list = np.repeat(user_batch, len(item_batch)).tolist()
        item_list = list(item_batch) * len(user_batch)

        u_sp = self.user_one_hot[user_list]
        pos_i_sp = self.kg_feat_mat[item_list]

        # Horizontally stack sparse matrices to get single positive & negative feature matrices
        pos_feats = sp.hstack([u_sp, pos_i_sp])
        pos_indices, pos_values, pos_shape = self._extract_sp_info(pos_feats)

        feed_dict = {
            model.pos_indices: pos_indices,
            model.pos_values: pos_values,
            model.pos_shape: pos_shape,

            model.mess_dropout: [0.] * len(eval(self.args.layer_size))
        }

        return feed_dict 
Example #20
Source File: feature_union.py    From Wordbatch with GNU General Public License v2.0 6 votes vote down vote up
def transform(self, X):
		"""Transform X separately by each transformer, concatenate results.

		Parameters
		----------
		X : iterable or array-like, depending on transformers
			Input data to be transformed.

		Returns
		-------
		X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
			hstack of results of transformers. sum_n_components is the
			sum of n_components (output dimension) over transformers.
		"""
		paral_params = [[X[t['col_pick']] if hasattr(t, 'col_pick') else X, t] for _, t, _ in self._iter()]
		Xs = Apply(transform_one, self.batcher).transform(paral_params)
		if not Xs:
			# All transformers are None
			return np.zeros((X.shape[0], 0))
		if self.concatenate:
			if any(sparse.issparse(f) for f in Xs):
				Xs = sparse.hstack(Xs).tocsr()
			else:
				Xs = np.hstack(Xs)
		return Xs 
Example #21
Source File: designmatrix.py    From lightkurve with MIT License 5 votes vote down vote up
def values(self):
        """2D numpy array containing the matrix values."""
        return np.hstack(tuple(m.values for m in self.matrices)) 
Example #22
Source File: kernel_approximation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def _transform_sparse(self, X):
        indices = X.indices.copy()
        indptr = X.indptr.copy()

        data_step = np.sqrt(X.data * self.sample_interval_)
        X_step = sp.csr_matrix((data_step, indices, indptr),
                               shape=X.shape, dtype=X.dtype, copy=False)
        X_new = [X_step]

        log_step_nz = self.sample_interval_ * np.log(X.data)
        step_nz = 2 * X.data * self.sample_interval_

        for j in range(1, self.sample_steps):
            factor_nz = np.sqrt(step_nz /
                                np.cosh(np.pi * j * self.sample_interval_))

            data_step = factor_nz * np.cos(j * log_step_nz)
            X_step = sp.csr_matrix((data_step, indices, indptr),
                                   shape=X.shape, dtype=X.dtype, copy=False)
            X_new.append(X_step)

            data_step = factor_nz * np.sin(j * log_step_nz)
            X_step = sp.csr_matrix((data_step, indices, indptr),
                                   shape=X.shape, dtype=X.dtype, copy=False)
            X_new.append(X_step)

        return sp.hstack(X_new) 
Example #23
Source File: featurizer.py    From snips-nlu with Apache License 2.0 5 votes vote down vote up
def transform(self, utterances):
        import scipy.sparse as sp

        x = self.tfidf_vectorizer.transform(utterances)
        if self.cooccurrence_vectorizer:
            x_cooccurrence = self.cooccurrence_vectorizer.transform(utterances)
            x = sp.hstack((x, x_cooccurrence))
        return x 
Example #24
Source File: matrix.py    From ektelo with Apache License 2.0 5 votes vote down vote up
def dense_matrix(self):
        return np.hstack([Q.dense_matrix() for Q in self.matrices]) 
Example #25
Source File: longitudinal_features_product.py    From tick with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _dense_finite_product(self, feat_mat):
        """Performs feature product on a numpy.ndarray containing
        finite exposures."""
        feat = [feat_mat]
        feat.extend([(feat_mat[:, i] * feat_mat[:, j]).reshape((-1, 1))
                     for i, j in self._mapper.values()])
        return np.hstack(feat) 
Example #26
Source File: longitudinal_features_product.py    From tick with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _sparse_finite_product(self, feat_mat):
        """Performs feature product on a scipy.sparse.csr_matrix containing
        finite exposures."""
        feat = [feat_mat.tocsc()]
        feat.extend([(feat_mat[:, i].multiply(feat_mat[:, j]))
                     for i, j in self.mapper.values()])
        return sps.hstack(feat).tocsr() 
Example #27
Source File: operator_utils.py    From grove with Apache License 2.0 5 votes vote down vote up
def to_realimag(z):
    """
    Convert a complex hermitian matrix to a real valued doubled up representation, i.e., for
    ``Z = Z_r + 1j * Z_i`` return ``R(Z)``::

        R(Z) = [ Z_r   Z_i]
               [-Z_i   Z_r]

    A complex hermitian matrix ``Z`` with elementwise real and imaginary parts
    ``Z = Z_r + 1j * Z_i`` can be
    isomorphically represented in doubled up form as::

        R(Z) = [ Z_r   Z_i]
               [-Z_i   Z_r]

        R(X)*R(Y) = [ (X_r*Y_r-X_i*Y_i)    (X_r*Y_i + X_i*Y_r)]
                    [-(X_r*Y_i + X_i*Y_r)  (X_r*Y_r-X_i*Y_i)  ]

                  = R(X*Y).

    In particular, ``Z`` is complex positive (semi-)definite iff ``R(Z)`` is real positive
    (semi-)definite.

    :param (qutip.Qobj|scipy.sparse.base.spmatrix) z:  The operator representation matrix.
    :returns: R(Z) the doubled up representation.
    :rtype: scipy.sparse.csr_matrix
    """
    if isinstance(z, qt.Qobj):
        z = z.data
    if not is_hermitian(z):  # pragma no coverage
        raise ValueError("Need a hermitian matrix z")
    return spvstack([sphstack([z.real, z.imag]), sphstack([z.imag.T, z.real])]).tocsr().real 
Example #28
Source File: loader_nfm.py    From knowledge_graph_attention_network with MIT License 5 votes vote down vote up
def _extract_sp_info(self, sp_feats):
        sp_indices = np.hstack((sp_feats.nonzero()[0][:, None],
                                sp_feats.nonzero()[1][:, None]))
        sp_values = sp_feats.data
        sp_shape = sp_feats.shape
        return sp_indices, sp_values, sp_shape 
Example #29
Source File: test_basic.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_hstack_vstack():
    """
    Tests sparse.hstack and sparse.vstack (as opposed to the HStack and VStack
    classes that they wrap).
    """

    def make_block(dtype):
        return theano.sparse.csr_matrix(name="%s block" % dtype,
                                        dtype=dtype)

    def get_expected_dtype(blocks, to_dtype):
        if to_dtype is None:
            block_dtypes = tuple(b.dtype for b in blocks)
            return theano.scalar.upcast(*block_dtypes)
        else:
            return to_dtype

    # a deliberately weird mix of dtypes to stack
    dtypes = ('complex128', theano.config.floatX)

    blocks = [make_block(dtype) for dtype in dtypes]

    for stack_dimension, stack_function in enumerate((theano.sparse.vstack,
                                                      theano.sparse.hstack)):

        for to_dtype in (None, ) + dtypes:
            stacked_blocks = stack_function(blocks, dtype=to_dtype)
            expected_dtype = get_expected_dtype(blocks, to_dtype)
            assert stacked_blocks.dtype == expected_dtype 
Example #30
Source File: feature.py    From text-classifier with Apache License 2.0 5 votes vote down vote up
def _add_feature(self, X, feature_to_add):
        """
        Returns sparse feature matrix with added feature.
        feature_to_add can also be a list of features.
        """
        from scipy.sparse import csr_matrix, hstack
        return hstack([X, csr_matrix(feature_to_add)], 'csr')