Python numpy.int64() Examples
The following are 30
code examples of numpy.int64().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numpy
, or try the search function
.
Example #1
Source File: skip_thoughts_model_test.py From DOTA_models with Apache License 2.0 | 6 votes |
def build_inputs(self): if self.mode == "encode": # Encode mode doesn't read from disk, so defer to parent. return super(SkipThoughtsModel, self).build_inputs() else: # Replace disk I/O with random Tensors. self.encode_ids = tf.random_uniform( [self.config.batch_size, 15], minval=0, maxval=self.config.vocab_size, dtype=tf.int64) self.decode_pre_ids = tf.random_uniform( [self.config.batch_size, 15], minval=0, maxval=self.config.vocab_size, dtype=tf.int64) self.decode_post_ids = tf.random_uniform( [self.config.batch_size, 15], minval=0, maxval=self.config.vocab_size, dtype=tf.int64) self.encode_mask = tf.ones_like(self.encode_ids) self.decode_pre_mask = tf.ones_like(self.decode_pre_ids) self.decode_post_mask = tf.ones_like(self.decode_post_ids)
Example #2
Source File: group_sampler.py From mmdetection with Apache License 2.0 | 6 votes |
def __iter__(self): indices = [] for i, size in enumerate(self.group_sizes): if size == 0: continue indice = np.where(self.flag == i)[0] assert len(indice) == size np.random.shuffle(indice) num_extra = int(np.ceil(size / self.samples_per_gpu) ) * self.samples_per_gpu - len(indice) indice = np.concatenate( [indice, np.random.choice(indice, num_extra)]) indices.append(indice) indices = np.concatenate(indices) indices = [ indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu] for i in np.random.permutation( range(len(indices) // self.samples_per_gpu)) ] indices = np.concatenate(indices) indices = indices.astype(np.int64).tolist() assert len(indices) == self.num_samples return iter(indices)
Example #3
Source File: metric.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def update(self, labels, preds): """ Implementation of updating metrics """ # get generated multi label from network cls_prob = preds[0].asnumpy() loc_loss = preds[1].asnumpy() cls_label = preds[2].asnumpy() valid_count = np.sum(cls_label >= 0) # overall accuracy & object accuracy label = cls_label.flatten() mask = np.where(label >= 0)[0] indices = np.int64(label[mask]) prob = cls_prob.transpose((0, 2, 1)).reshape((-1, cls_prob.shape[1])) prob = prob[mask, indices] self.sum_metric[0] += (-np.log(prob + self.eps)).sum() self.num_inst[0] += valid_count # smoothl1loss self.sum_metric[1] += np.sum(loc_loss) self.num_inst[1] += valid_count
Example #4
Source File: preprocess.py From deep-siamese-text-similarity with MIT License | 6 votes |
def transform(self, raw_documents): """Transform documents to word-id matrix. Convert words to ids with vocabulary fitted with fit or the one provided in the constructor. Args: raw_documents: An iterable which yield either str or unicode. Yields: x: iterable, [n_samples, max_document_length]. Word-id matrix. """ for tokens in self._tokenizer(raw_documents): word_ids = np.zeros(self.max_document_length, np.int64) for idx, token in enumerate(tokens): if idx >= self.max_document_length: break word_ids[idx] = self.vocabulary_.get(token) yield word_ids
Example #5
Source File: common.py From cat-bbs with MIT License | 6 votes |
def draw_heatmap(img, heatmap, alpha=0.5): """Draw a heatmap overlay over an image.""" assert len(heatmap.shape) == 2 or \ (len(heatmap.shape) == 3 and heatmap.shape[2] == 1) assert img.dtype in [np.uint8, np.int32, np.int64] assert heatmap.dtype in [np.float32, np.float64] if img.shape[0:2] != heatmap.shape[0:2]: heatmap_rs = np.clip(heatmap * 255, 0, 255).astype(np.uint8) heatmap_rs = ia.imresize_single_image( heatmap_rs[..., np.newaxis], img.shape[0:2], interpolation="nearest" ) heatmap = np.squeeze(heatmap_rs) / 255.0 cmap = plt.get_cmap('jet') heatmap_cmapped = cmap(heatmap) heatmap_cmapped = np.delete(heatmap_cmapped, 3, 2) heatmap_cmapped = heatmap_cmapped * 255 mix = (1-alpha) * img + alpha * heatmap_cmapped mix = np.clip(mix, 0, 255).astype(np.uint8) return mix
Example #6
Source File: test_selected_ci.py From pyscf with Apache License 2.0 | 6 votes |
def select_strs(myci, eri, eri_pq_max, civec_max, strs, norb, nelec): strs_add = [] for ia, str0 in enumerate(strs): occ = [] vir = [] for i in range(norb): if str0 & (1<<i): occ.append(i) else: vir.append(i) ca = civec_max[ia] for i1, i in enumerate(occ): for a1, a in enumerate(vir): if eri_pq_max[a,i]*ca > myci.select_cutoff: str1 = str0 ^ (1<<i) | (1<<a) strs_add.append(str1) if i < nelec and a >= nelec: for j in occ[:i1]: for b in vir[a1+1:]: if abs(eri[a,i,b,j])*ca > myci.select_cutoff: strs_add.append(str1 ^ (1<<j) | (1<<b)) strs_add = sorted(set(strs_add) - set(strs)) return numpy.asarray(strs_add, dtype=numpy.int64)
Example #7
Source File: instaboost.py From mmdetection with Apache License 2.0 | 6 votes |
def _parse_anns(self, results, anns, img): gt_bboxes = [] gt_labels = [] gt_masks_ann = [] for ann in anns: x1, y1, w, h = ann['bbox'] # TODO: more essential bug need to be fixed in instaboost if w <= 0 or h <= 0: continue bbox = [x1, y1, x1 + w, y1 + h] gt_bboxes.append(bbox) gt_labels.append(ann['category_id']) gt_masks_ann.append(ann['segmentation']) gt_bboxes = np.array(gt_bboxes, dtype=np.float32) gt_labels = np.array(gt_labels, dtype=np.int64) results['ann_info']['labels'] = gt_labels results['ann_info']['bboxes'] = gt_bboxes results['ann_info']['masks'] = gt_masks_ann results['img'] = img return results
Example #8
Source File: cistring.py From pyscf with Apache License 2.0 | 6 votes |
def gen_cre_str_index_o1(orb_list, nelec): '''C implementation of gen_cre_str_index function''' norb = len(orb_list) assert(nelec < norb) strs = make_strings(orb_list, nelec) if isinstance(strs, OIndexList): raise NotImplementedError('System with 64 orbitals or more') strs = numpy.array(strs, dtype=numpy.int64) na = strs.shape[0] link_index = numpy.empty((len(strs),norb-nelec,4), dtype=numpy.int32) libfci.FCIcre_str_index(link_index.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nelec), strs.ctypes.data_as(ctypes.c_void_p)) return link_index
Example #9
Source File: custom_module.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def prepare_sparse_params(self, param_rowids): '''Prepares the module for processing a data batch by pulling row_sparse parameters from kvstore to all devices based on rowids. Parameters ---------- param_rowids : dict of str to NDArray of list of NDArrays ''' if not self._kvstore: return assert(isinstance(param_rowids, dict)) for param_name, rowids in param_rowids.items(): if isinstance(rowids, (tuple, list)): rowids_1d = [] for r in rowids: rowids_1d.append(r.reshape((-1,)).astype(np.int64)) rowid = mx.nd.concat(*rowids_1d, dim=0) else: rowid = rowids param_idx = self._exec_group.param_names.index(param_name) param_val = self._exec_group.param_arrays[param_idx] self._kvstore.row_sparse_pull(param_name, param_val, row_ids=rowid, priority=-param_idx)
Example #10
Source File: custom_module.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def get_params_from_kv(self, arg_params, aux_params): """ Copy data from kvstore to `arg_params` and `aux_params`. Parameters ---------- arg_params : list of NDArray Target parameter arrays. aux_params : list of NDArray Target aux arrays. Notes ----- - This function will inplace update the NDArrays in arg_params and aux_params. """ assert(self._kvstore is not None) for name, block in zip(self._exec_group.param_names, self._exec_group.param_arrays): assert(isinstance(block, list)) if block[0].stype == 'row_sparse': row_ids = mx.nd.arange(start=0, stop=block[0].shape[0], dtype='int64') self._kvstore.row_sparse_pull(name, arg_params[name], row_ids=row_ids) else: assert(block[0].stype == 'default') self._kvstore.pull(name, out=arg_params[name]) if len(aux_params) > 0: raise NotImplementedError() return arg_params, aux_params
Example #11
Source File: cistring.py From pyscf with Apache License 2.0 | 6 votes |
def gen_des_str_index_o1(orb_list, nelec): '''C implementation of gen_des_str_index function''' assert(nelec > 0) strs = make_strings(orb_list, nelec) if isinstance(strs, OIndexList): raise NotImplementedError('System with 64 orbitals or more') strs = numpy.array(strs, dtype=numpy.int64) norb = len(orb_list) na = strs.shape[0] link_index = numpy.empty((len(strs),nelec,4), dtype=numpy.int32) libfci.FCIdes_str_index(link_index.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(na), ctypes.c_int(nelec), strs.ctypes.data_as(ctypes.c_void_p)) return link_index
Example #12
Source File: metric.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def update(self, labels, preds): """Updates the internal evaluation result. Parameters ---------- labels : list of `NDArray` The labels of the data. preds : list of `NDArray` Predicted values. """ labels, preds = check_label_shapes(labels, preds, True) for label, pred in zip(labels, preds): label = label.asnumpy() pred = pred.asnumpy() label = label.ravel() num_examples = pred.shape[0] assert label.shape[0] == num_examples, (label.shape[0], num_examples) prob = pred[numpy.arange(num_examples, dtype=numpy.int64), numpy.int64(label)] self.sum_metric += (-numpy.log(prob + self.eps)).sum() self.num_inst += num_examples
Example #13
Source File: cistring.py From pyscf with Apache License 2.0 | 6 votes |
def gen_linkstr_index_o0(orb_list, nelec, strs=None): if strs is None: strs = make_strings(orb_list, nelec) strdic = dict(zip(strs,range(strs.__len__()))) def propgate1e(str0): occ = [] vir = [] for i in orb_list: if str0 & (1<<i): occ.append(i) else: vir.append(i) linktab = [] for i in occ: linktab.append((i, i, strdic[str0], 1)) for i in occ: for a in vir: str1 = str0 ^ (1<<i) | (1<<a) # [cre, des, target_address, parity] linktab.append((a, i, strdic[str1], cre_des_sign(a, i, str0))) return linktab t = [propgate1e(s) for s in strs.astype(numpy.int64)] return numpy.array(t, dtype=numpy.int32)
Example #14
Source File: selected_ci.py From pyscf with Apache License 2.0 | 6 votes |
def gen_cre_linkstr(strs, norb, nelec): '''Given intermediates, the link table to generate input strs ''' if nelec == norb: return None strs = numpy.asarray(strs, dtype=numpy.int64) nvir = norb - nelec nstrs = len(strs) inter = numpy.empty((nstrs*nvir), dtype=numpy.int64) libfci.SCIcre_uniq_strs.restype = ctypes.c_int ninter = libfci.SCIcre_uniq_strs(inter.ctypes.data_as(ctypes.c_void_p), strs.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(nelec), ctypes.c_int(nstrs)) inter = numpy.asarray(sorted(set(inter[:ninter])), dtype=numpy.int64) ninter = len(inter) link_index = numpy.zeros((ninter,nelec+1,4), dtype=numpy.int32) libfci.SCIcre_linkstr(link_index.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(nelec), ctypes.c_int(nstrs), ctypes.c_int(ninter), strs.ctypes.data_as(ctypes.c_void_p), inter.ctypes.data_as(ctypes.c_void_p)) return link_index
Example #15
Source File: test_sparse_ndarray.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def test_create_row_sparse(): dim0 = 50 dim1 = 50 densities = [0, 0.5, 1] for density in densities: shape = rand_shape_2d(dim0, dim1) matrix = rand_ndarray(shape, 'row_sparse', density) data = matrix.data indices = matrix.indices rsp_created = mx.nd.sparse.row_sparse_array((data, indices), shape=shape) assert rsp_created.stype == 'row_sparse' assert same(rsp_created.data.asnumpy(), data.asnumpy()) assert same(rsp_created.indices.asnumpy(), indices.asnumpy()) rsp_copy = mx.nd.array(rsp_created) assert(same(rsp_copy.asnumpy(), rsp_created.asnumpy())) # add this test since we added np.int32 and np.int64 to integer_types if len(shape) == 2: for np_int_type in (np.int32, np.int64): shape = list(shape) shape = [np_int_type(x) for x in shape] arg1 = tuple(shape) mx.nd.sparse.row_sparse_array(arg1, tuple(shape)) shape[0] += 1 assert_exception(mx.nd.sparse.row_sparse_array, ValueError, arg1, tuple(shape))
Example #16
Source File: download_and_convert_mnist.py From DOTA_models with Apache License 2.0 | 6 votes |
def _extract_labels(filename, num_labels): """Extract the labels into a vector of int64 label IDs. Args: filename: The path to an MNIST labels file. num_labels: The number of labels in the file. Returns: A numpy array of shape [number_of_labels] """ print('Extracting labels from: ', filename) with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_labels) labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) return labels
Example #17
Source File: selected_ci.py From pyscf with Apache License 2.0 | 6 votes |
def select_strs(myci, eri, eri_pq_max, civec_max, strs, norb, nelec): strs = numpy.asarray(strs, dtype=numpy.int64) nstrs = len(strs) nvir = norb - nelec strs_add = numpy.empty((nstrs*(nelec*nvir)**2//4), dtype=numpy.int64) libfci.SCIselect_strs.restype = ctypes.c_int nadd = libfci.SCIselect_strs(strs_add.ctypes.data_as(ctypes.c_void_p), strs.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), eri_pq_max.ctypes.data_as(ctypes.c_void_p), civec_max.ctypes.data_as(ctypes.c_void_p), ctypes.c_double(myci.select_cutoff), ctypes.c_int(norb), ctypes.c_int(nelec), ctypes.c_int(nstrs)) strs_add = sorted(set(strs_add[:nadd]) - set(strs)) return numpy.asarray(strs_add, dtype=numpy.int64)
Example #18
Source File: ops_test.py From DOTA_models with Apache License 2.0 | 6 votes |
def test_indices_to_dense_vector_int(self): size = 500 num_indices = 25 rand_indices = np.random.permutation(np.arange(size))[0:num_indices] expected_output = np.zeros(size, dtype=np.int64) expected_output[rand_indices] = 1 tf_rand_indices = tf.constant(rand_indices) indicator = ops.indices_to_dense_vector( tf_rand_indices, size, 1, dtype=tf.int64) with self.test_session() as sess: output = sess.run(indicator) self.assertAllEqual(output, expected_output) self.assertEqual(output.dtype, expected_output.dtype)
Example #19
Source File: wrappers.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def _convert_observ(self, observ): """Convert the observation to 32 bits. Args: observ: Numpy observation. Raises: ValueError: Observation contains infinite values. Returns: Numpy observation with 32-bit data type. """ if not np.isfinite(observ).all(): raise ValueError('Infinite observation encountered.') if observ.dtype == np.float64: return observ.astype(np.float32) if observ.dtype == np.int64: return observ.astype(np.int32) return observ
Example #20
Source File: wrappers.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def _convert_observ(self, observ): """Convert the observation to 32 bits. Args: observ: Numpy observation. Raises: ValueError: Observation contains infinite values. Returns: Numpy observation with 32-bit data type. """ if not np.isfinite(observ).all(): raise ValueError('Infinite observation encountered.') if observ.dtype == np.float64: return observ.astype(np.float32) if observ.dtype == np.int64: return observ.astype(np.int32) return observ
Example #21
Source File: spectral_graph_partition.py From LanczosNetwork with MIT License | 6 votes |
def get_L_cluster_cut(L, node_label): adj = L - np.diag(np.diag(L)) adj[adj != 0] = 1.0 num_nodes = adj.shape[0] idx_row, idx_col = np.meshgrid(range(num_nodes), range(num_nodes)) idx_row, idx_col = idx_row.flatten().astype( np.int64), idx_col.flatten().astype(np.int64) mask = (node_label[idx_row] == node_label[idx_col]).reshape( num_nodes, num_nodes).astype(np.float) adj_cluster = adj * mask adj_cut = adj - adj_cluster L_cut = get_laplacian(adj_cut, graph_laplacian_type='L4') L_cluster = get_laplacian(adj_cluster, graph_laplacian_type='L4') return L_cluster, L_cut
Example #22
Source File: test_incremental_mean_and_var.py From differential-privacy-library with MIT License | 6 votes |
def test_inf_epsilon(self): X = np.random.rand(5, 10) dp_mean, dp_var, dp_count = _incremental_mean_and_var(X, epsilon=float("inf"), bounds=(0, 1), last_mean=0., last_variance=None, last_sample_count=np.zeros(X.shape[1], dtype=np.int64)) sk_mean, sk_var, sk_count = sk_incremental_mean_and_var(X, last_mean=0., last_variance=None, last_sample_count=np.zeros(X.shape[1], dtype=np.int64)) self.assertTrue(np.allclose(dp_mean, sk_mean)) self.assertIsNone(dp_var) self.assertIsNone(sk_var) self.assertTrue((dp_count == sk_count).all()) dp_mean, dp_var, dp_count = _incremental_mean_and_var(X, epsilon=float("inf"), bounds=(0, 1), last_mean=0., last_variance=0., last_sample_count=np.zeros(X.shape[1], dtype=np.int64)) sk_mean, sk_var, sk_count = sk_incremental_mean_and_var(X, last_mean=0., last_variance=0., last_sample_count=np.zeros(X.shape[1], dtype=np.int64)) self.assertTrue(np.allclose(dp_mean, sk_mean)) self.assertTrue(np.allclose(dp_var, sk_var)) self.assertTrue((dp_count == sk_count).all())
Example #23
Source File: data_reader_test.py From fine-lm with MIT License | 6 votes |
def testBasicExampleReading(self): dataset = self.problem.dataset( tf.estimator.ModeKeys.TRAIN, data_dir=self.data_dir, shuffle_files=False) examples = dataset.make_one_shot_iterator().get_next() with tf.train.MonitoredSession() as sess: # Check that there are multiple examples that have the right fields of the # right type (lists of int/float). for _ in range(10): ex_val = sess.run(examples) inputs, targets, floats = (ex_val["inputs"], ex_val["targets"], ex_val["floats"]) self.assertEqual(np.int64, inputs.dtype) self.assertEqual(np.int64, targets.dtype) self.assertEqual(np.float32, floats.dtype) for field in [inputs, targets, floats]: self.assertGreater(len(field), 0)
Example #24
Source File: selection.py From pyshgp with MIT License | 6 votes |
def _select_with_stream(self, population: Population, cases: CaseStream) -> Individual: candidates = one_individual_per_error_vector(population) ep = self.epsilon if isinstance(ep, bool) and ep: ep = self._epsilon_from_mad(population.all_error_vectors()) for case in cases: if len(candidates) <= 1: break errors_this_case = [i.error_vector[case] for i in candidates] best_val_for_case = min(errors_this_case) max_error = best_val_for_case if isinstance(ep, np.ndarray): max_error += ep[case] elif isinstance(ep, (float, int, np.int64, np.float64)): max_error += ep candidates = [i for i in candidates if i.error_vector[case] <= max_error] return choice(candidates)
Example #25
Source File: trainLandmarks.py From pytorch-mri-segmentation-3D with MIT License | 6 votes |
def trainLandmarks(main_folder_path = main_folder_path, postfix = postfix): scan_folders = glob.glob(main_folder_path + 'scans/*') FLAIR_path = '/pre/FLAIR' + postfix + '.nii.gz' m_arr = np.zeros([len(scan_folders), len(m_p)]) for i, sf in enumerate(scan_folders): print "Landmark training: {:4d}/{:4d}\r".format(i, len(scan_folders)), sys.stdout.flush() img_str = sf + FLAIR_path img_np = PP.numpyFromScan(img_str) p, m = NORM.getLandmarks(img_np) mapped_m = np.array([int(NORM.mapLandmarks(p, s, x)) for x in m], dtype=np.int64) m_arr[i, :] = mapped_m mean_m = np.mean(m_arr, axis = 0, dtype=np.int64) NORM.writeHistInfo(save_path, pc, s, m_p, mean_m) #dwi.standardize.write_std_cfg(cfgpath, pc, landmarks, scale, mapped_scores, # thresholding)
Example #26
Source File: ops_test.py From object_detector_app with MIT License | 6 votes |
def test_indices_to_dense_vector_int(self): size = 500 num_indices = 25 rand_indices = np.random.permutation(np.arange(size))[0:num_indices] expected_output = np.zeros(size, dtype=np.int64) expected_output[rand_indices] = 1 tf_rand_indices = tf.constant(rand_indices) indicator = ops.indices_to_dense_vector( tf_rand_indices, size, 1, dtype=tf.int64) with self.test_session() as sess: output = sess.run(indicator) self.assertAllEqual(output, expected_output) self.assertEqual(output.dtype, expected_output.dtype)
Example #27
Source File: DynamicTensionMap.py From Modeling-Cloth with MIT License | 6 votes |
def initalize(ob, key): '''Set up the indexing for viewing each edge per vert per face loop''' obm = get_bmesh(ob) ed_pairs_per_v = [] for f in obm.faces: for v in f.verts: set = [] for e in f.edges: if v in e.verts: set.append(e.index) ed_pairs_per_v.append(set) data[ob.name]['ed_pairs_per_v'] = np.array(ed_pairs_per_v) data[ob.name]['zeros'] = np.zeros(len(data[ob.name]['ed_pairs_per_v']) * 3).reshape(len(data[ob.name]['ed_pairs_per_v']), 3) key_coords = get_key_coords(ob, key) ed1 = get_edge_idx(ob) #linked = np.array([len(i.link_faces) for i in obm.edges]) > 0 data[ob.name]['edges'] = get_edge_idx(ob)#[linked] dif = key_coords[data[ob.name]['edges'][:,0]] - key_coords[data[ob.name]['edges'][:,1]] data[ob.name]['mags'] = np.sqrt(np.einsum('ij,ij->i', dif, dif)) mat_idx = np.zeros(len(ob.data.polygons), dtype=np.int64) ob.data.polygons.foreach_get('material_index', mat_idx) data[ob.name]['mat_index'] = mat_idx if 'material' not in data[ob.name]: print('ran this') material_setup(ob)
Example #28
Source File: SurfaceFollow.py From Modeling-Cloth with MIT License | 6 votes |
def triangulate(ob='empty', proxy=False): '''Requires a mesh. Returns an index array for viewing the coordinates as triangles. Store this!!! rather than recalculating every time. !!!Could use for_each_get with the mesh and polygons if all the faces have 3 points!!! Could also write bmesh to mesh and use foreach_get''' if ob == 'empty': ob = bpy.context.object if proxy: mods = True else: mods = False proxy = ob.to_mesh(bpy.context.scene, mods, 'PREVIEW') obm = get_bmesh(proxy) bmesh.ops.triangulate(obm, faces=obm.faces) obm.to_mesh(proxy) count = len(proxy.polygons) tri_idx = np.zeros(count * 3, dtype=np.int64) proxy.polygons.foreach_get('vertices', tri_idx) bpy.data.meshes.remove(proxy) obm.free() return tri_idx.reshape(count, 3)
Example #29
Source File: corpus.py From TaskBot with GNU General Public License v3.0 | 6 votes |
def doc2mat(self, raw_documents): tokenizer = self._build_tokenizer() values = [] col_index = [] raw_index = [] for i, raw in enumerate(raw_documents): tokens = tokenizer(raw) tokens = [self.token2id.get(i) for i in self._word_ngrams(tokens) if self.token2id.get(i) is not None] result_raw = defaultdict(int) for t in tokens: result_raw[t] += 1 values.extend(result_raw.values()) raw_index.extend([i] * len(result_raw)) col_index.extend(result_raw.keys()) return scipy.sparse.csr_matrix( (values, (raw_index, col_index)), shape=(len(raw_documents), self.size()), dtype=np.int64 )
Example #30
Source File: corpus.py From TaskBot with GNU General Public License v3.0 | 6 votes |
def doc2mat(self, raw_documents): tokenizer = self._build_tokenizer() values = [] col_index = [] raw_index = [] for i, raw in enumerate(raw_documents): tokens = tokenizer(raw) tokens = [self.token2id.get(i) for i in self._word_ngrams(tokens) if self.token2id.get(i) is not None] result_raw = defaultdict(int) for t in tokens: result_raw[t] += 1 values.extend(result_raw.values()) raw_index.extend([i] * len(result_raw)) col_index.extend(result_raw.keys()) return scipy.sparse.csr_matrix( (values, (raw_index, col_index)), shape=(len(raw_documents), self.size()), dtype=np.int64 )