Python scipy.sparse.save_npz() Examples

The following are 18 code examples of scipy.sparse.save_npz(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.sparse , or try the search function .
Example #1
Source File: base_attack.py    From DeepRobust with MIT License 6 votes vote down vote up
def save_adj(self, root=r'/tmp/', name='mod_adj'):
        """Save attacked adjacency matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """
        assert self.modified_adj is not None, \
                'modified_adj is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_adj = self.modified_adj

        if type(modified_adj) is torch.Tensor:
            sparse_adj = utils.to_scipy(modified_adj)
            sp.save_npz(osp.join(root, name), sparse_adj)
        else:
            sp.save_npz(osp.join(root, name), modified_adj) 
Example #2
Source File: base_attack.py    From DeepRobust with MIT License 6 votes vote down vote up
def save_adj(self, root=r'/tmp/', name='mod_adj'):
        """Save attacked adjacency matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """
        assert self.modified_adj is not None, \
                'modified_adj is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_adj = self.modified_adj

        if type(modified_adj) is torch.Tensor:
            sparse_adj = utils.to_scipy(modified_adj)
            sp.save_npz(osp.join(root, name), sparse_adj)
        else:
            sp.save_npz(osp.join(root, name), modified_adj) 
Example #3
Source File: reddit_fit_topics.py    From causal-text-embeddings with MIT License 5 votes vote down vote up
def load_term_counts(reddit, path='../dat/reddit/', force_redo=False):
	count_filename = path  + 'term_counts'
	vocab_filename = path + 'vocab'

	if os.path.exists(count_filename + '.npz') and not force_redo:
		return sparse.load_npz(count_filename + '.npz'), np.load(vocab_filename + '.npy')

	post_docs = reddit['post_text'].values
	counts, vocab, _ = tokenize_documents(post_docs)    
	sparse.save_npz(count_filename, counts)
	np.save(vocab_filename, vocab)
	return counts, np.array(vocab) 
Example #4
Source File: dump_tfidf.py    From denspi with Apache License 2.0 5 votes vote down vote up
def main():
    args = get_args()
    if args.nfs:
        from nsml import NSML_NFS_OUTPUT
        args.dump_dir = os.path.join(NSML_NFS_OUTPUT, args.dump_dir)
        args.out_dir = os.path.join(NSML_NFS_OUTPUT, args.out_dir)
        args.ranker_path = os.path.join(NSML_NFS_OUTPUT, args.ranker_path)
    args.ranker_path = os.path.join(args.ranker_path, 'docs-tfidf-ngram=2-hash=16777216-tokenizer=simple.npz')
    os.makedirs(args.out_dir)
    assert os.path.isdir(args.dump_dir)
    dump_paths = sorted([os.path.join(args.dump_dir, name) for name in os.listdir(args.dump_dir) if 'hdf5' in name])[
                 args.start:args.end]
    print(dump_paths)
    dump_names = [os.path.splitext(os.path.basename(path))[0] for path in dump_paths]
    dump_ranges = [list(map(int, name.split('-'))) for name in dump_names]
    phrase_dumps = [h5py.File(path, 'r') for path in dump_paths]

    ranker = None
    ranker = MyTfidfDocRanker(
        tfidf_path=args.ranker_path,
        strict=False
    )

    print('Ranker shape {} from {}'.format(ranker.doc_mat.shape, args.ranker_path))
    # new_mat = ranker.doc_mat.T.tocsr()
    # sp.save_npz('doc_tfidf.npz', new_mat)
    dump_tfidf(ranker, phrase_dumps, dump_names, args) 
Example #5
Source File: wikidatagraph.py    From opentapioca with Apache License 2.0 5 votes vote down vote up
def save_matrix(self, fname):
        sparse.save_npz(fname, self.mat) 
Example #6
Source File: load_data.py    From neural_graph_collaborative_filtering with MIT License 5 votes vote down vote up
def get_adj_mat(self):
        try:
            t1 = time()
            adj_mat = sp.load_npz(self.path + '/s_adj_mat.npz')
            norm_adj_mat = sp.load_npz(self.path + '/s_norm_adj_mat.npz')
            mean_adj_mat = sp.load_npz(self.path + '/s_mean_adj_mat.npz')
            print('already load adj matrix', adj_mat.shape, time() - t1)

        except Exception:
            adj_mat, norm_adj_mat, mean_adj_mat = self.create_adj_mat()
            sp.save_npz(self.path + '/s_adj_mat.npz', adj_mat)
            sp.save_npz(self.path + '/s_norm_adj_mat.npz', norm_adj_mat)
            sp.save_npz(self.path + '/s_mean_adj_mat.npz', mean_adj_mat)
        return adj_mat, norm_adj_mat, mean_adj_mat 
Example #7
Source File: loader_nfm.py    From knowledge_graph_attention_network with MIT License 5 votes vote down vote up
def get_kg_feature(self, kg_feat_file):
        try:
            kg_feat_mat = sp.load_npz(kg_feat_file)
            print('already load item kg feature mat', kg_feat_mat.shape)
        except Exception:
            kg_feat_mat = self._create_kg_feat_mat()
            sp.save_npz(kg_feat_file, kg_feat_mat)
            print('already save item kg feature mat:', kg_feat_file)
        return kg_feat_mat 
Example #8
Source File: make-trie.py    From isdi with MIT License 5 votes vote down vote up
def join_mats(fnames, s, e):
    ofname="mat_{}_{}".format(s, e)
    print(ofname, fnames)
    M = [sps.load_npz(f) for f in fnames]
    print("Done reading..")
    sps.save_npz(
        ofname,
        sps.vstack(M)
    ) 
Example #9
Source File: make-trie.py    From isdi with MIT License 5 votes vote down vote up
def join_smart_mat(fnames):
    """Join arrays in Mlist inplace"""
    # M.indptr M.indices
    indptr = np.zeros(num_devices+1, dtype=np.int32)
    indices = np.zeros(Msize, dtype=np.int32)    
    i_indptr, i_indices = 0, 0
    ofname = 'joined_mat.npz'
    M = [None for _ in fnames]
    for i, mf in enumerate(fnames) :
        M[i] = sps.load_npz(mf)
        print("Loaded matrix={}. shape={}. nnz={}".format(mf, M[i].shape, M[i].nnz))
        # Mindptr = M.indptr
        # Mindices = M.indices
        # indptr[i_indptr+1:i_indptr+len(Mindptr)] = Mindptr[1:] + indptr[i_indptr]
        # i_indptr += len(Mindptr)-1
        # indices[i_indices:i_indices+len(Mindices)] = Mindices
        # i_indices += i_indices
        # del M
    print("Saving the file...")
    M = sps.csr_matrix(
        (np.ones(len(indices)), indices, indptr),
        shape=(len(indptr)-1, num_apps),
        dtype=bool
    )
    print(M.nnz)
    sps.save_npz(ofname, M) 
Example #10
Source File: make-trie.py    From isdi with MIT License 5 votes vote down vote up
def create_matrix(mf, mfname, ofname_cnt):
    indptr = np.zeros(LIM+1, dtype=np.int32)
    indices = array.array('I')
    ofname = mfname.rsplit('.', 2)[0] + '.csr_matrix'.format(ofname_cnt)
    j = 0
    for j, d in enumerate(mf):
        if j>LIM: break
        terms = d.decode('utf-8').strip().split(',')
        if len(terms)<1: continue
        i, terms = int(terms[0]), terms[1:]
        indices.extend([_get(t) for t in terms])
        indptr[j%LIM+1] = len(indices)
        if j % 10000 == 0:
            print("Done {}".format(j))
 
    # print("Saving: j={} start: {} stop: {}".format(j, start, stop))
    if j>0:
        print("Saving... {}".format(ofname))
        if len(indptr) > j:
            indptr = indptr[:j+2]
        print(len(indices), indptr)
         
        M = sps.csr_matrix(
            (np.ones(len(indices)), indices, indptr),
            shape=(len(indptr)-1, num_apps),
            dtype=bool
        )
        print(M.nnz)
        sps.save_npz(ofname, M)
        create_matrix(mf, mfname, ofname_cnt+1) 
Example #11
Source File: peerread_fit_topics.py    From causal-text-embeddings with MIT License 5 votes vote down vote up
def load_term_counts(df, path='../dat/PeerRead/', force_redo=False, text_col='abstract_text'):
	count_filename = path  + 'term_counts'
	vocab_filename = path + 'vocab'

	if os.path.exists(count_filename + '.npz') and not force_redo:
		return sparse.load_npz(count_filename + '.npz'), np.load(vocab_filename + '.npy')

	post_docs = df[text_col].values
	counts, vocab, _ = tokenize_documents(post_docs)    
	sparse.save_npz(count_filename, counts)
	np.save(vocab_filename, vocab)
	return counts, np.array(vocab) 
Example #12
Source File: hashing.py    From deep_architect with MIT License 5 votes vote down vote up
def save_state(self, folderpath):
        state = {
            'num_evals': len(self.vecs_lst),
            'vals_lst': self.vals_lst,
        }
        ut.write_jsonfile(state,
                          ut.join_paths([folderpath, 'hash_model_state.json']))
        for i, vecs in enumerate(self.vecs_lst):
            sp.save_npz(ut.join_paths([folderpath, str(i) + '.npz']), vecs)

    # TODO: improve 
Example #13
Source File: reddit_posts.py    From causal-text-embeddings with MIT License 5 votes vote down vote up
def load_term_counts(path='../dat/', force_redo=False):
    count_filename = path  + 'reddit_term_counts'
    vocab_filename = path + 'vocab'

    if os.path.exists(count_filename + '.npz') and not force_redo:
        return sparse.load_npz(count_filename + '.npz'), np.load(vocab_filename + '.npy')
    
    reddit = load_reddit()
    post_docs = reddit['post_text'].values
    counts, vocab = tokenize_documents(post_docs)
    sparse.save_npz(path + 'reddit_term_counts', counts)
    np.save(path + 'vocab', vocab)
    return counts, vocab 
Example #14
Source File: peerread_output_att.py    From causal-text-embeddings with MIT License 5 votes vote down vote up
def load_term_counts(df, path='../dat/PeerRead/', force_redo=False, text_col='abstract_text'):
	count_filename = path  + 'term_counts'
	vocab_filename = path + 'vocab'

	if os.path.exists(count_filename + '.npz') and not force_redo:
		return sparse.load_npz(count_filename + '.npz').toarray(), np.load(vocab_filename + '.npy')

	post_docs = df[text_col].values
	counts, vocab, _ = tokenize_documents(post_docs)    
	sparse.save_npz(count_filename, counts)
	np.save(vocab_filename, vocab)
	return counts.toarray(), np.array(vocab) 
Example #15
Source File: reddit_output_att.py    From causal-text-embeddings with MIT License 5 votes vote down vote up
def load_term_counts(reddit, path='../dat/reddit/', force_redo=False):
	count_filename = path  + 'term_counts'
	vocab_filename = path + 'vocab'

	if os.path.exists(count_filename + '.npz') and not force_redo:
		return sparse.load_npz(count_filename + '.npz').toarray(), np.load(vocab_filename + '.npy')

	post_docs = reddit['post_text'].values
	counts, vocab, _ = tokenize_documents(post_docs)    
	sparse.save_npz(count_filename, counts)
	np.save(vocab_filename, vocab)
	return counts.toarray(), np.array(vocab) 
Example #16
Source File: base_attack.py    From DeepRobust with MIT License 5 votes vote down vote up
def save_features(self, root=r'/tmp/', name='mod_features'):
        """Save attacked node feature matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """

        assert self.modified_features is not None, \
                'modified_features is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_features = self.modified_features

        if type(modified_features) is torch.Tensor:
            sparse_features = utils.to_scipy(modified_features)
            sp.save_npz(osp.join(root, name), sparse_features)
        else:
            sp.save_npz(osp.join(root, name), modified_features) 
Example #17
Source File: base_attack.py    From DeepRobust with MIT License 5 votes vote down vote up
def save_features(self, root=r'/tmp/', name='mod_features'):
        """Save attacked node feature matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """

        assert self.modified_features is not None, \
                'modified_features is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_features = self.modified_features

        if type(modified_features) is torch.Tensor:
            sparse_features = utils.to_scipy(modified_features)
            sp.save_npz(osp.join(root, name), sparse_features)
        else:
            sp.save_npz(osp.join(root, name), modified_features) 
Example #18
Source File: libsvm.py    From celer with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def get_X_y(dataset, compressed_path, multilabel, replace=False):
    """Load a LIBSVM dataset as sparse X and observation y/Y.
    If X and y already exists as npz and npy, they are not redownloaded unless
    replace=True."""

    ext = '.npz' if multilabel else '.npy'
    y_path = pjoin(CELER_PATH, "%s_target%s" % (NAMES[dataset], ext))
    X_path = pjoin(CELER_PATH, "%s_data.npz" % NAMES[dataset])
    if replace or not os.path.isfile(y_path) or not os.path.isfile(X_path):
        tmp_path = pjoin(CELER_PATH, "%s" % NAMES[dataset])

        decompressor = BZ2Decompressor()
        print("Decompressing...")
        with open(tmp_path, "wb") as f, open(compressed_path, "rb") as g:
            for data in iter(lambda: g.read(100 * 1024), b''):
                f.write(decompressor.decompress(data))

        n_features_total = N_FEATURES[dataset]
        print("Loading svmlight file...")
        with open(tmp_path, 'rb') as f:
            X, y = load_svmlight_file(
                f, n_features_total, multilabel=multilabel)

        os.remove(tmp_path)
        X = sparse.csc_matrix(X)
        X.sort_indices()
        sparse.save_npz(X_path, X)

        if multilabel:
            indices = np.array([lab for labels in y for lab in labels])
            indptr = np.cumsum([0] + [len(labels) for labels in y])
            data = np.ones_like(indices)
            Y = sparse.csr_matrix((data, indices, indptr))
            sparse.save_npz(y_path, Y)
            return X, Y

        else:
            np.save(y_path, y)

    else:
        X = sparse.load_npz(X_path)
        y = np.load(y_path)

    return X, y