Python numpy.unique() Examples

The following are 30 code examples of numpy.unique(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy , or try the search function .
Example #1
Source File: dataloader_m.py    From models with MIT License 7 votes vote down vote up
def prepro_pos_table(pos_tables):
    """Extracts unique positions and sorts them."""
    if not isinstance(pos_tables, list):
        pos_tables = [pos_tables]

    pos_table = None
    for next_pos_table in pos_tables:
        if pos_table is None:
            pos_table = next_pos_table
        else:
            pos_table = pd.concat([pos_table, next_pos_table])
        pos_table = pos_table.groupby('chromo').apply(
            lambda df: pd.DataFrame({'pos': np.unique(df['pos'])}))
        pos_table.reset_index(inplace=True)
        pos_table = pos_table[['chromo', 'pos']]
        pos_table.sort_values(['chromo', 'pos'], inplace=True)
    return pos_table 
Example #2
Source File: MEDA.py    From transferlearning with MIT License 6 votes vote down vote up
def estimate_mu(self, _X1, _Y1, _X2, _Y2):
        adist_m = proxy_a_distance(_X1, _X2)
        C = len(np.unique(_Y1))
        epsilon = 1e-3
        list_adist_c = []
        for i in range(1, C + 1):
            ind_i, ind_j = np.where(_Y1 == i), np.where(_Y2 == i)
            Xsi = _X1[ind_i[0], :]
            Xtj = _X2[ind_j[0], :]
            adist_i = proxy_a_distance(Xsi, Xtj)
            list_adist_c.append(adist_i)
        adist_c = sum(list_adist_c) / C
        mu = adist_c / (adist_c + adist_m)
        if mu > 1:
            mu = 1
        if mu < epsilon:
            mu = 0
        return mu 
Example #3
Source File: utils.py    From contextualbandits with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def fit(self, X, y):
        if X.shape[0] == 0:
            return self
        elif np.unique(y).shape[0] <= 1:
            self.update_aux(y)
            return self

        seed = self.random_state.integers(np.iinfo(np.int32).max)
        self.model.set_params(random_state = seed)
        self.model.fit(X, y)
        n_nodes = self.model.tree_.node_count
        self.pos = np.zeros(n_nodes, dtype=ctypes.c_long)
        self.neg = np.zeros(n_nodes, dtype=ctypes.c_long)
        pred_node = self.model.apply(X).astype(ctypes.c_long)
        _create_node_counters(self.pos, self.neg, pred_node, y.astype(ctypes.c_double))
        self.pos = self.pos.astype(ctypes.c_double) + self.beta_prior[0]
        self.neg = self.neg.astype(ctypes.c_double) + self.beta_prior[1]

        self.is_fitted = True
        return self 
Example #4
Source File: test_util.py    From libTLDA with MIT License 6 votes vote down vote up
def test_one_hot():
    """Check if one_hot returns correct label matrices."""
    # Generate label vector
    y = np.hstack((np.ones((10,))*0,
                   np.ones((10,))*1,
                   np.ones((10,))*2))

    # Map to matrix
    Y, labels = one_hot(y)

    # Check for only 0's and 1's
    assert len(np.setdiff1d(np.unique(Y), [0, 1])) == 0

    # Check for correct labels
    assert np.all(labels == np.unique(y))

    # Check correct shape of matrix
    assert Y.shape[0] == y.shape[0]
    assert Y.shape[1] == len(labels) 
Example #5
Source File: utils.py    From contextualbandits with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def fit(self, X, y, *args, **kwargs):
        if X.shape[0] == 0:
            return self
        elif np.unique(y).shape[0] <= 1:
            return self
        self.model.fit(X, y)
        var = self.model.predict_proba(X)[:,1]
        var = var * (1 - var)   
        n = X.shape[1]
        self.Sigma = np.zeros((n+self.fit_intercept, n+self.fit_intercept), dtype=ctypes.c_double)
        X, Xcsr = self._process_X(X)
        _wrapper_double.update_matrices_noinv(
            X,
            np.empty(0, dtype=ctypes.c_double),
            var,
            self.Sigma,
            np.empty(0, dtype=ctypes.c_double),
            Xcsr = Xcsr,
            add_bias=self.fit_intercept,
            overwrite=1
        )
        _matrix_inv_symm(self.Sigma, self.lambda_)
        self.is_fitted = True 
Example #6
Source File: metrics.py    From DDPAE-video-prediction with MIT License 6 votes vote down vote up
def find_match(self, pred, gt):
    '''
    Match component to balls.
    '''
    batch_size, n_frames_input, n_components, _ = pred.shape
    diff = pred.reshape(batch_size, n_frames_input, n_components, 1, 2) - \
               gt.reshape(batch_size, n_frames_input, 1, n_components, 2)
    diff = np.sum(np.sum(diff ** 2, axis=-1), axis=1)
    # Direct indices
    indices = np.argmin(diff, axis=2)
    ambiguous = np.zeros(batch_size, dtype=np.int8)
    for i in range(batch_size):
      _, counts = np.unique(indices[i], return_counts=True)
      if not np.all(counts == 1):
        ambiguous[i] = 1
    return indices, ambiguous 
Example #7
Source File: utils.py    From contextualbandits with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _partial_fit_single(self, choice, X, a, r):
        yclass, this_choice = self._filter_arm_data(X, a, r, choice)
        if self.smooth is not None:
            self.counters[0, choice] += yclass.shape[0]

        xclass = X[this_choice, :]
        do_full_refit = False
        if self.buffer is not None:
            do_full_refit = self.buffer[choice].do_full_refit()
            xclass, yclass = self.buffer[choice].get_batch(xclass, yclass)

        if (xclass.shape[0] > 0) or self.force_fit:
            if (do_full_refit) and (np.unique(yclass).shape[0] >= 2):
                self.algos[choice].fit(xclass, yclass)
            else:
                self.algos[choice].partial_fit(xclass, yclass, classes = [0, 1])

        ## update the beta counters if needed
        if (self.force_counters):
            self._update_beta_counters(yclass, choice) 
Example #8
Source File: online.py    From contextualbandits with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _add_choices(self, nchoices):
        if isinstance(nchoices, int):
            self.nchoices = nchoices
            self.choice_names = None
        elif isinstance(nchoices, list) or nchoices.__class__.__name__ == "Series" or nchoices.__class__.__name__ == "DataFrame":
            self.choice_names = np.array(nchoices).reshape(-1)
            self.nchoices = self.choice_names.shape[0]
            if np.unique(self.choice_names).shape[0] != self.choice_names.shape[0]:
                raise ValueError("Arm/choice names contain duplicates.")
        elif isinstance(nchoices, np.ndarray):
            self.choice_names = nchoices.reshape(-1)
            self.nchoices = self.choice_names.shape[0]
            if np.unique(self.choice_names).shape[0] != self.choice_names.shape[0]:
                raise ValueError("Arm/choice names contain duplicates.")
        else:
            raise ValueError("'nchoices' must be an integer or list with named arms.") 
Example #9
Source File: KernelRidgeRegression.py    From fuku-ml with MIT License 6 votes vote down vote up
def init_W(self, mode='normal'):

        self.W = {}

        if (self.status != 'load_train_data') and (self.status != 'train'):
            print("Please load train data first.")
            return self.W

        self.status = 'init'

        self.data_num = len(self.train_Y)
        self.data_demension = len(self.train_X[0])
        self.class_list = list(itertools.combinations(np.unique(self.train_Y), 2))

        for class_item in self.class_list:
            self.W[class_item] = np.zeros(self.data_demension)

        return self.W 
Example #10
Source File: LinearRegression.py    From fuku-ml with MIT License 6 votes vote down vote up
def init_W(self, mode='normal'):

        self.W = {}

        if (self.status != 'load_train_data') and (self.status != 'train'):
            print("Please load train data first.")
            return self.W

        self.status = 'init'

        self.data_num = len(self.train_Y)
        self.data_demension = len(self.train_X[0])
        self.class_list = list(itertools.combinations(np.unique(self.train_Y), 2))

        for class_item in self.class_list:
            self.W[class_item] = np.zeros(self.data_demension)

        return self.W 
Example #11
Source File: dataset.py    From neural-combinatorial-optimization-rl-tensorflow with MIT License 6 votes vote down vote up
def visualize_sampling(self, permutations):
        max_length = len(permutations[0])
        grid = np.zeros([max_length,max_length]) # initialize heatmap grid to 0

        transposed_permutations = np.transpose(permutations)
        for t, cities_t in enumerate(transposed_permutations): # step t, cities chosen at step t
            city_indices, counts = np.unique(cities_t,return_counts=True,axis=0)
            for u,v in zip(city_indices, counts):
                grid[t][u]+=v # update grid with counts from the batch of permutations

        # plot heatmap
        fig = plt.figure()
        rcParams.update({'font.size': 22})
        ax = fig.add_subplot(1,1,1)
        ax.set_aspect('equal')
        plt.imshow(grid, interpolation='nearest', cmap='gray')
        plt.colorbar()
        plt.title('Sampled permutations')
        plt.ylabel('Time t')
        plt.xlabel('City i')
        plt.show() 
Example #12
Source File: decision_tree.py    From discomll with Apache License 2.0 6 votes vote down vote up
def rand_indices(x, rand_attr):
    """
    Function randomly selects features without replacement. It used with random forest. Selected features must have more
    than one distinct value.
    x: numpy array - dataset
    rand_attr - parameter defines number of randomly selected features
    """
    loop = True
    indices = range(len(x[0]))

    while loop:
        loop = False
        # randomly selected features without replacement
        rand_list = random.sample(indices, rand_attr)
        for i in rand_list:
            if len(np.unique(x[:, i])) == 1:
                loop = True
                indices.remove(i)
                if len(indices) == rand_attr - 1:
                    return -1  # all features in dataset have one distinct value
                break
    return rand_list 
Example #13
Source File: SupportVectorMachine.py    From fuku-ml with MIT License 6 votes vote down vote up
def init_W(self, mode='normal'):

        self.W = {}

        if (self.status != 'load_train_data') and (self.status != 'train'):
            print("Please load train data first.")
            return self.W

        self.status = 'init'

        self.data_num = len(self.train_Y)
        self.data_demension = len(self.train_X[0])
        self.class_list = list(itertools.combinations(np.unique(self.train_Y), 2))

        for class_item in self.class_list:
            self.W[class_item] = np.zeros(self.data_demension)

        return self.W 
Example #14
Source File: test.py    From cvpr2018-hnd with MIT License 6 votes vote down vote up
def count_super(p, m, counters, preds, labels, label_to_ch):
    
    for l in np.unique(labels):
        preds_l = preds[labels == l]
        
        # in -> known
        if label_to_ch[l]:
            acc = np.zeros_like(preds_l, dtype=bool)
            for c in label_to_ch[l]:
                if p == 0: counters['data'][m][c] += preds_l.shape[0]
                acc |= (preds_l == c)
            acc_sum = acc.sum()
            for c in label_to_ch[l]:
                counters['acc'][p,m][c] += acc_sum
        
        # out -> novel
        else:
            if p == 0: counters['data'][m][-1] += preds_l.shape[0]
            acc_sum = (preds_l < 0).sum()
            counters['acc'][p,m][-1] += acc_sum 
Example #15
Source File: utils.py    From pruning_yolov3 with GNU General Public License v3.0 6 votes vote down vote up
def print_mutation(hyp, results, bucket=''):
    # Print mutation results to evolve.txt (for use with train.py --evolve)
    a = '%10s' * len(hyp) % tuple(hyp.keys())  # hyperparam keys
    b = '%10.3g' * len(hyp) % tuple(hyp.values())  # hyperparam values
    c = '%10.3g' * len(results) % results  # results (P, R, mAP, F1, test_loss)
    print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))

    if bucket:
        os.system('gsutil cp gs://%s/evolve.txt .' % bucket)  # download evolve.txt

    with open('evolve.txt', 'a') as f:  # append result
        f.write(c + b + '\n')
    x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0)  # load unique rows
    np.savetxt('evolve.txt', x[np.argsort(-fitness(x))], '%10.3g')  # save sort by fitness

    if bucket:
        os.system('gsutil cp evolve.txt gs://%s' % bucket)  # upload evolve.txt 
Example #16
Source File: BestMap.py    From sparse-subspace-clustering-python with MIT License 6 votes vote down vote up
def BestMap(L1, L2):

    L1 = L1.flatten(order='F').astype(float)
    L2 = L2.flatten(order='F').astype(float)
    if L1.size != L2.size:
        sys.exit('size(L1) must == size(L2)')
    Label1 = np.unique(L1)
    nClass1 = Label1.size
    Label2 = np.unique(L2)
    nClass2 = Label2.size
    nClass = max(nClass1, nClass2)

    # For Hungarian - Label2 are Workers, Label1 are Tasks.
    G = np.zeros([nClass, nClass]).astype(float)
    for i in range(0, nClass2):
        for j in range(0, nClass1):
            G[i, j] = np.sum(np.logical_and(L2 == Label2[i], L1 == Label1[j]))

    c = Hungarian(-G)
    newL2 = np.zeros(L2.shape)
    for i in range(0, nClass2):
        newL2[L2 == Label2[i]] = Label1[c[i]]
    return newL2 
Example #17
Source File: RidgeRegression.py    From fuku-ml with MIT License 6 votes vote down vote up
def init_W(self, mode='normal'):

        self.W = {}

        if (self.status != 'load_train_data') and (self.status != 'train'):
            print("Please load train data first.")
            return self.W

        self.status = 'init'

        self.data_num = len(self.train_Y)
        self.data_demension = len(self.train_X[0])
        self.class_list = list(itertools.combinations(np.unique(self.train_Y), 2))

        for class_item in self.class_list:
            self.W[class_item] = np.zeros(self.data_demension)

        return self.W 
Example #18
Source File: EasyTL.py    From transferlearning with MIT License 5 votes vote down vote up
def get_class_center(Xs,Ys,Xt,dist):
	
    source_class_center = np.array([])
    Dct = np.array([])
    for i in np.unique(Ys):
        sel_mask = Ys == i
        X_i = Xs[sel_mask.flatten()]
        mean_i = np.mean(X_i, axis=0)
        if len(source_class_center) == 0:
            source_class_center = mean_i.reshape(-1, 1)
        else:
            source_class_center = np.hstack((source_class_center, mean_i.reshape(-1, 1)))
		
        if dist == "ma":
            Dct_c = get_ma_dist(Xt, X_i)
        elif dist == "euclidean":
            Dct_c = np.sqrt(np.nansum((mean_i - Xt)**2, axis=1))
        elif dist == "sqeuc":
            Dct_c = np.nansum((mean_i - Xt)**2, axis=1)
        elif dist == "cosine":
            Dct_c = get_cosine_dist(Xt, mean_i)
        elif dist == "rbf":
            Dct_c = np.nansum((mean_i - Xt)**2, axis=1)
            Dct_c = np.exp(- Dct_c / 1);
        
        if len(Dct) == 0:
            Dct = Dct_c.reshape(-1, 1)
        else:
            Dct = np.hstack((Dct, Dct_c.reshape(-1, 1)))
    
    return source_class_center, Dct 
Example #19
Source File: test.py    From cvpr2018-hnd with MIT License 5 votes vote down vote up
def count_test(p, counters, preds, labels, T, hierarchical_measure=False):

    label_hnd = T['label_hnd']
    
    if hierarchical_measure:
        HP_mat = T['HP_mat']
        HF_mat = T['HF_mat']
        dist_mat = T['dist_mat']
    
    for l in np.unique(labels.cpu().numpy()):
        preds_l = preds[(labels == int(l)).cpu().numpy().astype(bool)]
        acc = np.zeros_like(preds_l, dtype=bool)
        if hierarchical_measure:
            HE = MAX_DIST*np.ones_like(preds_l, dtype=int)
            HP, HR, HF = np.zeros_like(preds_l), np.zeros_like(preds_l), np.zeros_like(preds_l)
        for c in label_hnd[l]:
            acc |= (preds_l == c)
            if hierarchical_measure:
                HE = np.minimum(HE, dist_mat[preds_l, c])
                HP = np.maximum(HP, HP_mat[preds_l, c])
                HR = np.maximum(HR, HP_mat[c, preds_l])
                HF = np.maximum(HF, HF_mat[preds_l, c])
        
        if p == 0: counters['data'][l] += preds_l.shape[0]
        counters['acc'][p,l] += acc.sum()
        if hierarchical_measure:
            counters['HE'][p,l] += HE.sum()
            counters['HP'][p,l] += HP.sum()
            counters['HR'][p,l] += HR.sum()
            counters['HF'][p,l] += HF.sum() 
Example #20
Source File: samplers.py    From cvpr2018-hnd with MIT License 5 votes vote down vote up
def balanced_shuffle(labels, num_epochs=50, path=None, start_time=time.time()):

    order_path = '{path}/balanced_order_{num_epochs}.h5' \
                       .format(path=path, num_epochs=num_epochs)
    if path is not None and os.path.isfile(order_path):
        with h5py.File(order_path, 'r') as f:
            order = f['order'][:]
    else:
        evenness = 5 # batch_size | evenness*num_classes
        classes = np.unique(labels.numpy())
        num_classes = len(classes)
        loc_data_per_class = [np.argwhere(labels.numpy() == k).flatten() for k in classes]
        num_data_per_class = [(labels.numpy() == k).sum() for k in classes]
        max_data_per_class = max(num_data_per_class)
        num_loc_split = (max_data_per_class // evenness) * np.ones(evenness, dtype=int)
        num_loc_split[:(max_data_per_class % evenness)] += 1
        loc_split = [0]
        loc_split.extend(np.cumsum(num_loc_split).tolist())
        order = -np.ones([num_epochs, max_data_per_class*num_classes], dtype=int)
        for epoch in range(num_epochs):
            order_e = -np.ones([max_data_per_class, num_classes], dtype=int)
            for k in classes:
                loc_k = np.random.permutation(loc_data_per_class[k])
                for i in range(evenness):
                    loc_i = loc_k[loc_split[i]:loc_split[i+1]]
                    order_e[i:(len(loc_i)*evenness+i):evenness, k] = loc_i
            order[epoch] = order_e.flatten()
            print_freq = min([100, (num_epochs-1) // 5 + 1])
            print_me = (epoch == 0 or epoch == num_epochs-1 or (epoch+1) % print_freq == 0)
            if print_me:
                print('{epoch:4d}/{num_epochs:4d} e; '.format(epoch=epoch+1, num_epochs=num_epochs), end='')
                print('generate balanced random order; {time:8.3f} s'.format(time=time.time()-start_time))
        
        if path is not None:
            with h5py.File(order_path, 'w') as f:
                f.create_dataset('order', data=order, compression='gzip', compression_opts=9)
    
    print('balanced random order; {time:8.3f} s'.format(time=time.time()-start_time))
    return torch.from_numpy(order) 
Example #21
Source File: test.py    From PHATE with GNU General Public License v2.0 5 votes vote down vote up
def test_simple():
    tree_data, tree_clusters = phate.tree.gen_dla(n_branch=3)
    phate_operator = phate.PHATE(knn=15, t=100, verbose=False)
    tree_phate = phate_operator.fit_transform(tree_data)
    assert tree_phate.shape == (tree_data.shape[0], 2)
    clusters = phate.cluster.kmeans(phate_operator, n_clusters='auto')
    assert np.issubdtype(clusters.dtype, np.signedinteger)
    assert len(np.unique(clusters)) >= 2
    assert len(clusters.shape) == 1
    assert len(clusters) == tree_data.shape[0]
    clusters = phate.cluster.kmeans(phate_operator, n_clusters=3)
    assert np.issubdtype(clusters.dtype, np.signedinteger)
    assert len(np.unique(clusters)) == 3
    assert len(clusters.shape) == 1
    assert len(clusters) == tree_data.shape[0]
    phate_operator.fit(phate_operator.graph)
    G = graphtools.Graph(
        phate_operator.graph.kernel,
        precomputed="affinity",
        use_pygsp=True,
        verbose=False,
    )
    phate_operator.fit(G)
    G = pygsp.graphs.Graph(G.W)
    phate_operator.fit(G)
    phate_operator.fit(anndata.AnnData(tree_data))
    with assert_raises_message(TypeError, "Expected phate_op to be of type PHATE. Got 1"):
        phate.cluster.kmeans(1) 
Example #22
Source File: functional.py    From torch-toolbox with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def poisson_noise(img):
    imgtype = img.dtype
    img = img.astype(np.float32) / 255.0
    vals = len(np.unique(img))
    vals = 2 ** np.ceil(np.log2(vals))
    noisy = 255 * \
        np.clip(np.random.poisson(img.astype(np.float32) * vals) / float(vals), 0, 1)
    return noisy.astype(imgtype) 
Example #23
Source File: measures.py    From discomll with Apache License 2.0 5 votes vote down vote up
def info_gain_numeric(x, y, accuracy):
    x_unique = list(np.unique(x))
    if len(x_unique) == 1:
        return None
    indices = x.argsort()  # sort numeric attribute
    x, y = x[indices], y[indices]  # save sorted features with sorted labels

    right_dist = np.bincount(y)
    dummy_class = np.array([len(right_dist)])
    class_indices = right_dist.nonzero()[0]
    right_dist = right_dist[class_indices]
    left_dist = np.zeros(len(class_indices))

    diffs = np.nonzero(y[:-1] != y[1:])[0] + 1  # different neighbor classes have value True
    if accuracy > 0:
        diffs = np.array([diffs[i] for i in range(1, len(diffs)) if diffs[i] - diffs[i - 1] > accuracy],
                         dtype=np.int32) if len(diffs) > 15 else diffs
    intervals = np.array((np.concatenate(([0], diffs[:-1])), diffs)).T
    if len(diffs) < 2:
        return None

    max_ig, max_i, max_j = 0, 0, 0
    prior_h = h(right_dist)  # calculate prior entropy

    for i, j in intervals:
        dist = np.bincount(np.concatenate((dummy_class, y[i:j])))[class_indices]
        left_dist += dist
        right_dist -= dist
        coef = np.true_divide((np.sum(left_dist), np.sum(right_dist)), len(y))
        ig = prior_h - np.dot(coef, [h(left_dist[left_dist.nonzero()]), h(right_dist[right_dist.nonzero()])])
        if ig > max_ig:
            max_ig, max_i, max_j = ig, i, j

    if x[max_i] == x[max_j]:
        ind = x_unique.index(x[max_i])
        mean = np.float32(np.mean((x_unique[1 if ind == 0 else ind - 1], x_unique[ind])))
    else:
        mean = np.float32(np.mean((x[max_i], x[max_j])))

    return float(max_ig), [mean, mean] 
Example #24
Source File: RegNet2020.py    From Pytorch-Networks with MIT License 5 votes vote down vote up
def generate_regnet(w_a, w_0, w_m, d, q=8):
    """Generates per block ws from RegNet parameters."""
    assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
    ws_cont = np.arange(d) * w_a + w_0
    ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
    ws = w_0 * np.power(w_m, ks)
    ws = np.round(np.divide(ws, q)) * q
    num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
    ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
    return ws, num_stages, max_stage, ws_cont 
Example #25
Source File: measures.py    From discomll with Apache License 2.0 5 votes vote down vote up
def nominal_splits(x, y, x_vals, y_dist, separate_max):
    """
    Function uses heuristic to find best binary split of nominal values. Heuristic is described in (1) and it is
    originally defined for binary classes. We extend it to work with multiple classes by comparing label with least
    samples to others.

    x: numpy array - nominal feature
    y: numpy array - label
    x_vals: numpy array - unique nominal values of x
    y_dist: dictionary - distribution of labels

    Reference:
    (1) Classification and Regression Trees by Breiman, Friedman, Olshen, and Stone, pages 101- 102. 
    """
    # select a label with least samples
    y_val = max(y_dist, key=y_dist.get) if separate_max else min(y_dist, key=y_dist.get)

    prior = y_dist[y_val] / float(len(y))  # prior distribution of selected label

    values, dist, splits = [], [], []
    for x_val in x_vals:  # for every unique nominal value
        dist.append(Counter(y[x == x_val]))  # distribution of labels at selected nominal value
        splits.append(x_val)
        suma = sum([prior * dist[-1][y_key] for y_key in y_dist.keys()])
        # estimate probability
        values.append(prior * dist[-1][y_val] / float(suma))
    indices = np.array(values).argsort()[::-1]

    # distributions and splits are sorted according to probabilities
    return np.array(dist)[indices], np.array(splits)[indices].tolist() 
Example #26
Source File: k_medoids.py    From discomll with Apache License 2.0 5 votes vote down vote up
def fit(sim_mat, D_len, cidx):
    """
    Algorithm maximizes energy between clusters, which is distinction in this algorithm. Distance matrix contains mostly 0, which are overlooked due to search of maximal distances. Algorithm does not try to retain k clusters.

    D: numpy array - Symmetric distance matrix
    k: int - number of clusters
    """

    min_energy = np.inf
    for j in range(3):
        # select indices in each sample that maximizes its dimension
        inds = [np.argmin([sim_mat[idy].get(idx, 0) for idx in cidx]) for idy in range(D_len) if idy in sim_mat]

        cidx = []
        energy = 0  # current enengy
        for i in np.unique(inds):
            indsi = np.where(inds == i)[0]  # find indices for every cluster

            minind, min_value = 0, 0
            for index, idy in enumerate(indsi):
                if idy in sim_mat:
                    # value = sum([sim_mat[idy].get(idx,0) for idx in indsi])
                    value = 0
                    for idx in indsi:
                        value += sim_mat[idy].get(idx, 0)
                    if value < min_value:
                        minind, min_value = index, value
            energy += min_value
            cidx.append(indsi[minind])  # new centers

        if energy < min_energy:
            min_energy, inds_min, cidx_min = energy, inds, cidx

    return inds_min, cidx_min  # cluster for every instance, medoids indices 
Example #27
Source File: test_suba.py    From libTLDA with MIT License 5 votes vote down vote up
def test_predict_semi():
    """Test for making predictions."""
    X = rnd.randn(10, 2)
    y = np.hstack((np.zeros((5,)), np.ones((5,))))
    Z = rnd.randn(10, 2) + 1
    u = np.array([[0, 0], [9, 1]])
    clf = SemiSubspaceAlignedClassifier()
    clf.fit(X, y, Z, u)
    u_pred = clf.predict(Z)
    labels = np.unique(y)
    assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0 
Example #28
Source File: test_random.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def test_unique_zipfian_generator():
    ctx = mx.context.current_context()
    if ctx.device_type == 'cpu':
        num_sampled = 8192
        range_max = 793472
        batch_size = 4
        op = mx.nd._internal._sample_unique_zipfian
        classes, num_trials = op(range_max, shape=(batch_size, num_sampled))
        for i in range(batch_size):
            num_trial = num_trials[i].asscalar()
            # test uniqueness
            assert np.unique(classes[i].asnumpy()).size == num_sampled
            # test num trials. reference count obtained from pytorch implementation
            assert num_trial > 14500
            assert num_trial < 17000 
Example #29
Source File: metric.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 5 votes vote down vote up
def update_binary_stats(self, label, pred):
        """
        Update various binary classification counts for a single (label, pred)
        pair.

        Parameters
        ----------
        label : `NDArray`
            The labels of the data.

        pred : `NDArray`
            Predicted values.
        """
        pred = pred.asnumpy()
        label = label.asnumpy().astype('int32')
        pred_label = numpy.argmax(pred, axis=1)

        check_label_shapes(label, pred)
        if len(numpy.unique(label)) > 2:
            raise ValueError("%s currently only supports binary classification."
                             % self.__class__.__name__)
        pred_true = (pred_label == 1)
        pred_false = 1 - pred_true
        label_true = (label == 1)
        label_false = 1 - label_true

        self.true_positives += (pred_true * label_true).sum()
        self.false_positives += (pred_true * label_false).sum()
        self.false_negatives += (pred_false * label_true).sum()
        self.true_negatives += (pred_false * label_false).sum() 
Example #30
Source File: test_iw.py    From libTLDA with MIT License 5 votes vote down vote up
def test_predict():
    """Test for making predictions."""
    X = rnd.randn(10, 2)
    y = np.hstack((-np.ones((5,)), np.ones((5,))))
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier()
    clf.fit(X, y, Z)
    u_pred = clf.predict(Z)
    labels = np.unique(y)
    assert len(np.setdiff1d(np.unique(u_pred), labels)) == 0