Python numpy.unique() Examples
The following are 30
code examples of numpy.unique().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
numpy
, or try the search function
.
Example #1
Source File: dataloader_m.py From models with MIT License | 7 votes |
def prepro_pos_table(pos_tables): """Extracts unique positions and sorts them.""" if not isinstance(pos_tables, list): pos_tables = [pos_tables] pos_table = None for next_pos_table in pos_tables: if pos_table is None: pos_table = next_pos_table else: pos_table = pd.concat([pos_table, next_pos_table]) pos_table = pos_table.groupby('chromo').apply( lambda df: pd.DataFrame({'pos': np.unique(df['pos'])})) pos_table.reset_index(inplace=True) pos_table = pos_table[['chromo', 'pos']] pos_table.sort_values(['chromo', 'pos'], inplace=True) return pos_table
Example #2
Source File: MEDA.py From transferlearning with MIT License | 6 votes |
def estimate_mu(self, _X1, _Y1, _X2, _Y2): adist_m = proxy_a_distance(_X1, _X2) C = len(np.unique(_Y1)) epsilon = 1e-3 list_adist_c = [] for i in range(1, C + 1): ind_i, ind_j = np.where(_Y1 == i), np.where(_Y2 == i) Xsi = _X1[ind_i[0], :] Xtj = _X2[ind_j[0], :] adist_i = proxy_a_distance(Xsi, Xtj) list_adist_c.append(adist_i) adist_c = sum(list_adist_c) / C mu = adist_c / (adist_c + adist_m) if mu > 1: mu = 1 if mu < epsilon: mu = 0 return mu
Example #3
Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License | 6 votes |
def fit(self, X, y): if X.shape[0] == 0: return self elif np.unique(y).shape[0] <= 1: self.update_aux(y) return self seed = self.random_state.integers(np.iinfo(np.int32).max) self.model.set_params(random_state = seed) self.model.fit(X, y) n_nodes = self.model.tree_.node_count self.pos = np.zeros(n_nodes, dtype=ctypes.c_long) self.neg = np.zeros(n_nodes, dtype=ctypes.c_long) pred_node = self.model.apply(X).astype(ctypes.c_long) _create_node_counters(self.pos, self.neg, pred_node, y.astype(ctypes.c_double)) self.pos = self.pos.astype(ctypes.c_double) + self.beta_prior[0] self.neg = self.neg.astype(ctypes.c_double) + self.beta_prior[1] self.is_fitted = True return self
Example #4
Source File: test_util.py From libTLDA with MIT License | 6 votes |
def test_one_hot(): """Check if one_hot returns correct label matrices.""" # Generate label vector y = np.hstack((np.ones((10,))*0, np.ones((10,))*1, np.ones((10,))*2)) # Map to matrix Y, labels = one_hot(y) # Check for only 0's and 1's assert len(np.setdiff1d(np.unique(Y), [0, 1])) == 0 # Check for correct labels assert np.all(labels == np.unique(y)) # Check correct shape of matrix assert Y.shape[0] == y.shape[0] assert Y.shape[1] == len(labels)
Example #5
Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License | 6 votes |
def fit(self, X, y, *args, **kwargs): if X.shape[0] == 0: return self elif np.unique(y).shape[0] <= 1: return self self.model.fit(X, y) var = self.model.predict_proba(X)[:,1] var = var * (1 - var) n = X.shape[1] self.Sigma = np.zeros((n+self.fit_intercept, n+self.fit_intercept), dtype=ctypes.c_double) X, Xcsr = self._process_X(X) _wrapper_double.update_matrices_noinv( X, np.empty(0, dtype=ctypes.c_double), var, self.Sigma, np.empty(0, dtype=ctypes.c_double), Xcsr = Xcsr, add_bias=self.fit_intercept, overwrite=1 ) _matrix_inv_symm(self.Sigma, self.lambda_) self.is_fitted = True
Example #6
Source File: metrics.py From DDPAE-video-prediction with MIT License | 6 votes |
def find_match(self, pred, gt): ''' Match component to balls. ''' batch_size, n_frames_input, n_components, _ = pred.shape diff = pred.reshape(batch_size, n_frames_input, n_components, 1, 2) - \ gt.reshape(batch_size, n_frames_input, 1, n_components, 2) diff = np.sum(np.sum(diff ** 2, axis=-1), axis=1) # Direct indices indices = np.argmin(diff, axis=2) ambiguous = np.zeros(batch_size, dtype=np.int8) for i in range(batch_size): _, counts = np.unique(indices[i], return_counts=True) if not np.all(counts == 1): ambiguous[i] = 1 return indices, ambiguous
Example #7
Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License | 6 votes |
def _partial_fit_single(self, choice, X, a, r): yclass, this_choice = self._filter_arm_data(X, a, r, choice) if self.smooth is not None: self.counters[0, choice] += yclass.shape[0] xclass = X[this_choice, :] do_full_refit = False if self.buffer is not None: do_full_refit = self.buffer[choice].do_full_refit() xclass, yclass = self.buffer[choice].get_batch(xclass, yclass) if (xclass.shape[0] > 0) or self.force_fit: if (do_full_refit) and (np.unique(yclass).shape[0] >= 2): self.algos[choice].fit(xclass, yclass) else: self.algos[choice].partial_fit(xclass, yclass, classes = [0, 1]) ## update the beta counters if needed if (self.force_counters): self._update_beta_counters(yclass, choice)
Example #8
Source File: online.py From contextualbandits with BSD 2-Clause "Simplified" License | 6 votes |
def _add_choices(self, nchoices): if isinstance(nchoices, int): self.nchoices = nchoices self.choice_names = None elif isinstance(nchoices, list) or nchoices.__class__.__name__ == "Series" or nchoices.__class__.__name__ == "DataFrame": self.choice_names = np.array(nchoices).reshape(-1) self.nchoices = self.choice_names.shape[0] if np.unique(self.choice_names).shape[0] != self.choice_names.shape[0]: raise ValueError("Arm/choice names contain duplicates.") elif isinstance(nchoices, np.ndarray): self.choice_names = nchoices.reshape(-1) self.nchoices = self.choice_names.shape[0] if np.unique(self.choice_names).shape[0] != self.choice_names.shape[0]: raise ValueError("Arm/choice names contain duplicates.") else: raise ValueError("'nchoices' must be an integer or list with named arms.")
Example #9
Source File: KernelRidgeRegression.py From fuku-ml with MIT License | 6 votes |
def init_W(self, mode='normal'): self.W = {} if (self.status != 'load_train_data') and (self.status != 'train'): print("Please load train data first.") return self.W self.status = 'init' self.data_num = len(self.train_Y) self.data_demension = len(self.train_X[0]) self.class_list = list(itertools.combinations(np.unique(self.train_Y), 2)) for class_item in self.class_list: self.W[class_item] = np.zeros(self.data_demension) return self.W
Example #10
Source File: LinearRegression.py From fuku-ml with MIT License | 6 votes |
def init_W(self, mode='normal'): self.W = {} if (self.status != 'load_train_data') and (self.status != 'train'): print("Please load train data first.") return self.W self.status = 'init' self.data_num = len(self.train_Y) self.data_demension = len(self.train_X[0]) self.class_list = list(itertools.combinations(np.unique(self.train_Y), 2)) for class_item in self.class_list: self.W[class_item] = np.zeros(self.data_demension) return self.W
Example #11
Source File: dataset.py From neural-combinatorial-optimization-rl-tensorflow with MIT License | 6 votes |
def visualize_sampling(self, permutations): max_length = len(permutations[0]) grid = np.zeros([max_length,max_length]) # initialize heatmap grid to 0 transposed_permutations = np.transpose(permutations) for t, cities_t in enumerate(transposed_permutations): # step t, cities chosen at step t city_indices, counts = np.unique(cities_t,return_counts=True,axis=0) for u,v in zip(city_indices, counts): grid[t][u]+=v # update grid with counts from the batch of permutations # plot heatmap fig = plt.figure() rcParams.update({'font.size': 22}) ax = fig.add_subplot(1,1,1) ax.set_aspect('equal') plt.imshow(grid, interpolation='nearest', cmap='gray') plt.colorbar() plt.title('Sampled permutations') plt.ylabel('Time t') plt.xlabel('City i') plt.show()
Example #12
Source File: decision_tree.py From discomll with Apache License 2.0 | 6 votes |
def rand_indices(x, rand_attr): """ Function randomly selects features without replacement. It used with random forest. Selected features must have more than one distinct value. x: numpy array - dataset rand_attr - parameter defines number of randomly selected features """ loop = True indices = range(len(x[0])) while loop: loop = False # randomly selected features without replacement rand_list = random.sample(indices, rand_attr) for i in rand_list: if len(np.unique(x[:, i])) == 1: loop = True indices.remove(i) if len(indices) == rand_attr - 1: return -1 # all features in dataset have one distinct value break return rand_list
Example #13
Source File: SupportVectorMachine.py From fuku-ml with MIT License | 6 votes |
def init_W(self, mode='normal'): self.W = {} if (self.status != 'load_train_data') and (self.status != 'train'): print("Please load train data first.") return self.W self.status = 'init' self.data_num = len(self.train_Y) self.data_demension = len(self.train_X[0]) self.class_list = list(itertools.combinations(np.unique(self.train_Y), 2)) for class_item in self.class_list: self.W[class_item] = np.zeros(self.data_demension) return self.W
Example #14
Source File: test.py From cvpr2018-hnd with MIT License | 6 votes |
def count_super(p, m, counters, preds, labels, label_to_ch): for l in np.unique(labels): preds_l = preds[labels == l] # in -> known if label_to_ch[l]: acc = np.zeros_like(preds_l, dtype=bool) for c in label_to_ch[l]: if p == 0: counters['data'][m][c] += preds_l.shape[0] acc |= (preds_l == c) acc_sum = acc.sum() for c in label_to_ch[l]: counters['acc'][p,m][c] += acc_sum # out -> novel else: if p == 0: counters['data'][m][-1] += preds_l.shape[0] acc_sum = (preds_l < 0).sum() counters['acc'][p,m][-1] += acc_sum
Example #15
Source File: utils.py From pruning_yolov3 with GNU General Public License v3.0 | 6 votes |
def print_mutation(hyp, results, bucket=''): # Print mutation results to evolve.txt (for use with train.py --evolve) a = '%10s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys b = '%10.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values c = '%10.3g' * len(results) % results # results (P, R, mAP, F1, test_loss) print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c)) if bucket: os.system('gsutil cp gs://%s/evolve.txt .' % bucket) # download evolve.txt with open('evolve.txt', 'a') as f: # append result f.write(c + b + '\n') x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows np.savetxt('evolve.txt', x[np.argsort(-fitness(x))], '%10.3g') # save sort by fitness if bucket: os.system('gsutil cp evolve.txt gs://%s' % bucket) # upload evolve.txt
Example #16
Source File: BestMap.py From sparse-subspace-clustering-python with MIT License | 6 votes |
def BestMap(L1, L2): L1 = L1.flatten(order='F').astype(float) L2 = L2.flatten(order='F').astype(float) if L1.size != L2.size: sys.exit('size(L1) must == size(L2)') Label1 = np.unique(L1) nClass1 = Label1.size Label2 = np.unique(L2) nClass2 = Label2.size nClass = max(nClass1, nClass2) # For Hungarian - Label2 are Workers, Label1 are Tasks. G = np.zeros([nClass, nClass]).astype(float) for i in range(0, nClass2): for j in range(0, nClass1): G[i, j] = np.sum(np.logical_and(L2 == Label2[i], L1 == Label1[j])) c = Hungarian(-G) newL2 = np.zeros(L2.shape) for i in range(0, nClass2): newL2[L2 == Label2[i]] = Label1[c[i]] return newL2
Example #17
Source File: RidgeRegression.py From fuku-ml with MIT License | 6 votes |
def init_W(self, mode='normal'): self.W = {} if (self.status != 'load_train_data') and (self.status != 'train'): print("Please load train data first.") return self.W self.status = 'init' self.data_num = len(self.train_Y) self.data_demension = len(self.train_X[0]) self.class_list = list(itertools.combinations(np.unique(self.train_Y), 2)) for class_item in self.class_list: self.W[class_item] = np.zeros(self.data_demension) return self.W
Example #18
Source File: EasyTL.py From transferlearning with MIT License | 5 votes |
def get_class_center(Xs,Ys,Xt,dist): source_class_center = np.array([]) Dct = np.array([]) for i in np.unique(Ys): sel_mask = Ys == i X_i = Xs[sel_mask.flatten()] mean_i = np.mean(X_i, axis=0) if len(source_class_center) == 0: source_class_center = mean_i.reshape(-1, 1) else: source_class_center = np.hstack((source_class_center, mean_i.reshape(-1, 1))) if dist == "ma": Dct_c = get_ma_dist(Xt, X_i) elif dist == "euclidean": Dct_c = np.sqrt(np.nansum((mean_i - Xt)**2, axis=1)) elif dist == "sqeuc": Dct_c = np.nansum((mean_i - Xt)**2, axis=1) elif dist == "cosine": Dct_c = get_cosine_dist(Xt, mean_i) elif dist == "rbf": Dct_c = np.nansum((mean_i - Xt)**2, axis=1) Dct_c = np.exp(- Dct_c / 1); if len(Dct) == 0: Dct = Dct_c.reshape(-1, 1) else: Dct = np.hstack((Dct, Dct_c.reshape(-1, 1))) return source_class_center, Dct
Example #19
Source File: test.py From cvpr2018-hnd with MIT License | 5 votes |
def count_test(p, counters, preds, labels, T, hierarchical_measure=False): label_hnd = T['label_hnd'] if hierarchical_measure: HP_mat = T['HP_mat'] HF_mat = T['HF_mat'] dist_mat = T['dist_mat'] for l in np.unique(labels.cpu().numpy()): preds_l = preds[(labels == int(l)).cpu().numpy().astype(bool)] acc = np.zeros_like(preds_l, dtype=bool) if hierarchical_measure: HE = MAX_DIST*np.ones_like(preds_l, dtype=int) HP, HR, HF = np.zeros_like(preds_l), np.zeros_like(preds_l), np.zeros_like(preds_l) for c in label_hnd[l]: acc |= (preds_l == c) if hierarchical_measure: HE = np.minimum(HE, dist_mat[preds_l, c]) HP = np.maximum(HP, HP_mat[preds_l, c]) HR = np.maximum(HR, HP_mat[c, preds_l]) HF = np.maximum(HF, HF_mat[preds_l, c]) if p == 0: counters['data'][l] += preds_l.shape[0] counters['acc'][p,l] += acc.sum() if hierarchical_measure: counters['HE'][p,l] += HE.sum() counters['HP'][p,l] += HP.sum() counters['HR'][p,l] += HR.sum() counters['HF'][p,l] += HF.sum()
Example #20
Source File: samplers.py From cvpr2018-hnd with MIT License | 5 votes |
def balanced_shuffle(labels, num_epochs=50, path=None, start_time=time.time()): order_path = '{path}/balanced_order_{num_epochs}.h5' \ .format(path=path, num_epochs=num_epochs) if path is not None and os.path.isfile(order_path): with h5py.File(order_path, 'r') as f: order = f['order'][:] else: evenness = 5 # batch_size | evenness*num_classes classes = np.unique(labels.numpy()) num_classes = len(classes) loc_data_per_class = [np.argwhere(labels.numpy() == k).flatten() for k in classes] num_data_per_class = [(labels.numpy() == k).sum() for k in classes] max_data_per_class = max(num_data_per_class) num_loc_split = (max_data_per_class // evenness) * np.ones(evenness, dtype=int) num_loc_split[:(max_data_per_class % evenness)] += 1 loc_split = [0] loc_split.extend(np.cumsum(num_loc_split).tolist()) order = -np.ones([num_epochs, max_data_per_class*num_classes], dtype=int) for epoch in range(num_epochs): order_e = -np.ones([max_data_per_class, num_classes], dtype=int) for k in classes: loc_k = np.random.permutation(loc_data_per_class[k]) for i in range(evenness): loc_i = loc_k[loc_split[i]:loc_split[i+1]] order_e[i:(len(loc_i)*evenness+i):evenness, k] = loc_i order[epoch] = order_e.flatten() print_freq = min([100, (num_epochs-1) // 5 + 1]) print_me = (epoch == 0 or epoch == num_epochs-1 or (epoch+1) % print_freq == 0) if print_me: print('{epoch:4d}/{num_epochs:4d} e; '.format(epoch=epoch+1, num_epochs=num_epochs), end='') print('generate balanced random order; {time:8.3f} s'.format(time=time.time()-start_time)) if path is not None: with h5py.File(order_path, 'w') as f: f.create_dataset('order', data=order, compression='gzip', compression_opts=9) print('balanced random order; {time:8.3f} s'.format(time=time.time()-start_time)) return torch.from_numpy(order)
Example #21
Source File: test.py From PHATE with GNU General Public License v2.0 | 5 votes |
def test_simple(): tree_data, tree_clusters = phate.tree.gen_dla(n_branch=3) phate_operator = phate.PHATE(knn=15, t=100, verbose=False) tree_phate = phate_operator.fit_transform(tree_data) assert tree_phate.shape == (tree_data.shape[0], 2) clusters = phate.cluster.kmeans(phate_operator, n_clusters='auto') assert np.issubdtype(clusters.dtype, np.signedinteger) assert len(np.unique(clusters)) >= 2 assert len(clusters.shape) == 1 assert len(clusters) == tree_data.shape[0] clusters = phate.cluster.kmeans(phate_operator, n_clusters=3) assert np.issubdtype(clusters.dtype, np.signedinteger) assert len(np.unique(clusters)) == 3 assert len(clusters.shape) == 1 assert len(clusters) == tree_data.shape[0] phate_operator.fit(phate_operator.graph) G = graphtools.Graph( phate_operator.graph.kernel, precomputed="affinity", use_pygsp=True, verbose=False, ) phate_operator.fit(G) G = pygsp.graphs.Graph(G.W) phate_operator.fit(G) phate_operator.fit(anndata.AnnData(tree_data)) with assert_raises_message(TypeError, "Expected phate_op to be of type PHATE. Got 1"): phate.cluster.kmeans(1)
Example #22
Source File: functional.py From torch-toolbox with BSD 3-Clause "New" or "Revised" License | 5 votes |
def poisson_noise(img): imgtype = img.dtype img = img.astype(np.float32) / 255.0 vals = len(np.unique(img)) vals = 2 ** np.ceil(np.log2(vals)) noisy = 255 * \ np.clip(np.random.poisson(img.astype(np.float32) * vals) / float(vals), 0, 1) return noisy.astype(imgtype)
Example #23
Source File: measures.py From discomll with Apache License 2.0 | 5 votes |
def info_gain_numeric(x, y, accuracy): x_unique = list(np.unique(x)) if len(x_unique) == 1: return None indices = x.argsort() # sort numeric attribute x, y = x[indices], y[indices] # save sorted features with sorted labels right_dist = np.bincount(y) dummy_class = np.array([len(right_dist)]) class_indices = right_dist.nonzero()[0] right_dist = right_dist[class_indices] left_dist = np.zeros(len(class_indices)) diffs = np.nonzero(y[:-1] != y[1:])[0] + 1 # different neighbor classes have value True if accuracy > 0: diffs = np.array([diffs[i] for i in range(1, len(diffs)) if diffs[i] - diffs[i - 1] > accuracy], dtype=np.int32) if len(diffs) > 15 else diffs intervals = np.array((np.concatenate(([0], diffs[:-1])), diffs)).T if len(diffs) < 2: return None max_ig, max_i, max_j = 0, 0, 0 prior_h = h(right_dist) # calculate prior entropy for i, j in intervals: dist = np.bincount(np.concatenate((dummy_class, y[i:j])))[class_indices] left_dist += dist right_dist -= dist coef = np.true_divide((np.sum(left_dist), np.sum(right_dist)), len(y)) ig = prior_h - np.dot(coef, [h(left_dist[left_dist.nonzero()]), h(right_dist[right_dist.nonzero()])]) if ig > max_ig: max_ig, max_i, max_j = ig, i, j if x[max_i] == x[max_j]: ind = x_unique.index(x[max_i]) mean = np.float32(np.mean((x_unique[1 if ind == 0 else ind - 1], x_unique[ind]))) else: mean = np.float32(np.mean((x[max_i], x[max_j]))) return float(max_ig), [mean, mean]
Example #24
Source File: RegNet2020.py From Pytorch-Networks with MIT License | 5 votes |
def generate_regnet(w_a, w_0, w_m, d, q=8): """Generates per block ws from RegNet parameters.""" assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0 ws_cont = np.arange(d) * w_a + w_0 ks = np.round(np.log(ws_cont / w_0) / np.log(w_m)) ws = w_0 * np.power(w_m, ks) ws = np.round(np.divide(ws, q)) * q num_stages, max_stage = len(np.unique(ws)), ks.max() + 1 ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist() return ws, num_stages, max_stage, ws_cont
Example #25
Source File: measures.py From discomll with Apache License 2.0 | 5 votes |
def nominal_splits(x, y, x_vals, y_dist, separate_max): """ Function uses heuristic to find best binary split of nominal values. Heuristic is described in (1) and it is originally defined for binary classes. We extend it to work with multiple classes by comparing label with least samples to others. x: numpy array - nominal feature y: numpy array - label x_vals: numpy array - unique nominal values of x y_dist: dictionary - distribution of labels Reference: (1) Classification and Regression Trees by Breiman, Friedman, Olshen, and Stone, pages 101- 102. """ # select a label with least samples y_val = max(y_dist, key=y_dist.get) if separate_max else min(y_dist, key=y_dist.get) prior = y_dist[y_val] / float(len(y)) # prior distribution of selected label values, dist, splits = [], [], [] for x_val in x_vals: # for every unique nominal value dist.append(Counter(y[x == x_val])) # distribution of labels at selected nominal value splits.append(x_val) suma = sum([prior * dist[-1][y_key] for y_key in y_dist.keys()]) # estimate probability values.append(prior * dist[-1][y_val] / float(suma)) indices = np.array(values).argsort()[::-1] # distributions and splits are sorted according to probabilities return np.array(dist)[indices], np.array(splits)[indices].tolist()
Example #26
Source File: k_medoids.py From discomll with Apache License 2.0 | 5 votes |
def fit(sim_mat, D_len, cidx): """ Algorithm maximizes energy between clusters, which is distinction in this algorithm. Distance matrix contains mostly 0, which are overlooked due to search of maximal distances. Algorithm does not try to retain k clusters. D: numpy array - Symmetric distance matrix k: int - number of clusters """ min_energy = np.inf for j in range(3): # select indices in each sample that maximizes its dimension inds = [np.argmin([sim_mat[idy].get(idx, 0) for idx in cidx]) for idy in range(D_len) if idy in sim_mat] cidx = [] energy = 0 # current enengy for i in np.unique(inds): indsi = np.where(inds == i)[0] # find indices for every cluster minind, min_value = 0, 0 for index, idy in enumerate(indsi): if idy in sim_mat: # value = sum([sim_mat[idy].get(idx,0) for idx in indsi]) value = 0 for idx in indsi: value += sim_mat[idy].get(idx, 0) if value < min_value: minind, min_value = index, value energy += min_value cidx.append(indsi[minind]) # new centers if energy < min_energy: min_energy, inds_min, cidx_min = energy, inds, cidx return inds_min, cidx_min # cluster for every instance, medoids indices
Example #27
Source File: test_suba.py From libTLDA with MIT License | 5 votes |
def test_predict_semi(): """Test for making predictions.""" X = rnd.randn(10, 2) y = np.hstack((np.zeros((5,)), np.ones((5,)))) Z = rnd.randn(10, 2) + 1 u = np.array([[0, 0], [9, 1]]) clf = SemiSubspaceAlignedClassifier() clf.fit(X, y, Z, u) u_pred = clf.predict(Z) labels = np.unique(y) assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0
Example #28
Source File: test_random.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def test_unique_zipfian_generator(): ctx = mx.context.current_context() if ctx.device_type == 'cpu': num_sampled = 8192 range_max = 793472 batch_size = 4 op = mx.nd._internal._sample_unique_zipfian classes, num_trials = op(range_max, shape=(batch_size, num_sampled)) for i in range(batch_size): num_trial = num_trials[i].asscalar() # test uniqueness assert np.unique(classes[i].asnumpy()).size == num_sampled # test num trials. reference count obtained from pytorch implementation assert num_trial > 14500 assert num_trial < 17000
Example #29
Source File: metric.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def update_binary_stats(self, label, pred): """ Update various binary classification counts for a single (label, pred) pair. Parameters ---------- label : `NDArray` The labels of the data. pred : `NDArray` Predicted values. """ pred = pred.asnumpy() label = label.asnumpy().astype('int32') pred_label = numpy.argmax(pred, axis=1) check_label_shapes(label, pred) if len(numpy.unique(label)) > 2: raise ValueError("%s currently only supports binary classification." % self.__class__.__name__) pred_true = (pred_label == 1) pred_false = 1 - pred_true label_true = (label == 1) label_false = 1 - label_true self.true_positives += (pred_true * label_true).sum() self.false_positives += (pred_true * label_false).sum() self.false_negatives += (pred_false * label_true).sum() self.true_negatives += (pred_false * label_false).sum()
Example #30
Source File: test_iw.py From libTLDA with MIT License | 5 votes |
def test_predict(): """Test for making predictions.""" X = rnd.randn(10, 2) y = np.hstack((-np.ones((5,)), np.ones((5,)))) Z = rnd.randn(10, 2) + 1 clf = ImportanceWeightedClassifier() clf.fit(X, y, Z) u_pred = clf.predict(Z) labels = np.unique(y) assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0