Python numpy.unique() Examples

The following are code examples for showing how to use numpy.unique(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don't like. You can also save this page to your account.

Example 1
Project: j3dview   Author: blank63   File: shp1.py    (MIT License) View Source Project 7 votes vote down vote up
def gl_init(self,array_table):
        self.gl_hide = False

        self.gl_vertex_array = gl.VertexArray()
        glBindVertexArray(self.gl_vertex_array)

        self.gl_vertex_buffer = gl.Buffer()
        glBindBuffer(GL_ARRAY_BUFFER,self.gl_vertex_buffer)

        self.gl_element_count = 3*gl_count_triangles(self)
        self.gl_element_buffer = gl.Buffer()
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,self.gl_element_buffer)

        vertex_type =  numpy.dtype([array_table[attribute].field() for attribute in self.attributes])
        vertex_count = sum(len(primitive.vertices) for primitive in self.primitives)
        vertex_array = numpy.empty(vertex_count,vertex_type)

        for attribute in self.attributes:
            array_table[attribute].load(self,vertex_array)

        vertex_array,element_map = numpy.unique(vertex_array,return_inverse=True)
        element_array = gl_create_element_array(self,element_map,self.gl_element_count)

        glBufferData(GL_ARRAY_BUFFER,vertex_array.nbytes,vertex_array,GL_STATIC_DRAW)
        glBufferData(GL_ELEMENT_ARRAY_BUFFER,element_array.nbytes,element_array,GL_STATIC_DRAW) 
Example 2
Project: pybot   Author: spillai   File: recognition_utils.py    (license) View Source Project 7 votes vote down vote up
def __init__(self, filename, target_map, classifier='svm'): 
        
        self.seed_ = 0
        self.filename_ = filename
        self.target_map_ = target_map
        self.target_ids_ = (np.unique(target_map.keys())).astype(np.int32)
        self.epoch_no_ = 0
        self.st_time_ = time.time()

        # Setup classifier
        print('-------------------------------')        
        print('====> Building Classifier, setting class weights') 
        if classifier == 'svm': 
            self.clf_hyparams_ = {'C':[0.01, 0.1, 1.0, 10.0, 100.0], 'class_weight': ['balanced']}
            self.clf_base_ = LinearSVC(random_state=self.seed_)
        elif classifier == 'sgd': 
            self.clf_hyparams_ = {'alpha':[0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], 'class_weight':['auto']} # 'loss':['hinge'], 
            self.clf_ = SGDClassifier(loss='log', penalty='l2', shuffle=False, random_state=self.seed_, 
                                      warm_start=True, n_jobs=-1, n_iter=1, verbose=4)
        else: 
            raise Exception('Unknown classifier type %s. Choose from [sgd, svm, gradient-boosting, extra-trees]' 
                            % classifier) 
Example 3
Project: rca-evaluation   Author: sieve-microservices   File: cluster.py    (license) View Source Project 6 votes vote down vote up
def silhouette_score(series, clusters):
    distances = np.zeros((series.shape[0], series.shape[0]))
    for idx_a, metric_a in enumerate(series):
        for idx_b, metric_b in enumerate(series):
            distances[idx_a, idx_b] = _sbd(metric_a, metric_b)[0]
    labels = np.zeros(series.shape[0])
    for i, (cluster, indicies) in enumerate(clusters):
        for index in indicies:
            labels[index] = i

    # silhouette is only defined, if we have 2 clusters with assignments at 
    # minimum
    if len(np.unique(labels)) == 1 or (len(np.unique(labels)) >= distances.shape[0]):
    #if len(np.unique(labels)) == 1:
        return labels, -1
    else:
        return labels, _silhouette_score(distances, labels, metric='precomputed') 
Example 4
Project: pytorch-semseg   Author: meetshah1995   File: ade20k_loader.py    (MIT License) View Source Project 6 votes vote down vote up
def transform(self, img, lbl):
        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= self.mean
        img = m.imresize(img, (self.img_size[0], self.img_size[1]))
        # Resize scales images from 0 to 255, thus we need
        # to divide by 255.0
        img = img.astype(float) / 255.0
        # NHWC -> NCWH
        img = img.transpose(2, 0, 1)

        lbl = self.encode_segmap(lbl)
        classes = np.unique(lbl)
        lbl = lbl.astype(float)
        lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
        lbl = lbl.astype(int)
        assert(np.all(classes == np.unique(lbl)))

        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()
        return img, lbl 
Example 5
Project: cellranger   Author: 10XGenomics   File: pca.py    (license) View Source Project 6 votes vote down vote up
def get_normalized_dispersion(mat_mean, mat_var, nbins=20):
    mat_disp = (mat_var - mat_mean) / np.square(mat_mean)

    quantiles = np.percentile(mat_mean, np.arange(0, 100, 100 / nbins))
    quantiles = np.append(quantiles, mat_mean.max())

    # merge bins with no difference in value
    quantiles = np.unique(quantiles)

    if len(quantiles) <= 1:
        # pathological case: the means are all identical. just return raw dispersion.
        return mat_disp

    # calc median dispersion per bin
    (disp_meds, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, mat_disp, statistic='median', bins=quantiles)

    # calc median absolute deviation of dispersion per bin
    disp_meds_arr = disp_meds[disp_bins-1] # 0th bin is empty since our quantiles start from 0
    disp_abs_dev = abs(mat_disp - disp_meds_arr)
    (disp_mads, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, disp_abs_dev, statistic='median', bins=quantiles)

    # calculate normalized dispersion
    disp_mads_arr = disp_mads[disp_bins-1]
    disp_norm = (mat_disp - disp_meds_arr) / disp_mads_arr
    return disp_norm 
Example 6
Project: Python-Machine-Learning-By-Example   Author: PacktPublishing   File: 1decision_tree_submit.py    (MIT License) View Source Project 6 votes vote down vote up
def get_best_split(X, y, criterion):
    """ Obtain the best splitting point and resulting children for the data set X, y
    Args:
        X, y (numpy.ndarray, data set)
        criterion (gini or entropy)
    Returns:
        dict {index: index of the feature, value: feature value, children: left and right children}
    """
    best_index, best_value, best_score, children = None, None, 1, None
    for index in range(len(X[0])):
        for value in np.sort(np.unique(X[:, index])):
            groups = split_node(X, y, index, value)
            impurity = weighted_impurity([groups[0][1], groups[1][1]], criterion)
            if impurity < best_score:
                best_index, best_value, best_score, children = index, value, impurity, groups
    return {'index': best_index, 'value': best_value, 'children': children} 
Example 7
Project: tissue_analysis   Author: VirtualPlants   File: spatial_image_analysis.py    (license) View Source Project 6 votes vote down vote up
def consideronlylabels(self, list2consider, verbose = False):
        """
        Add labels to the ignoredlabels list (set) and update the self._labels cache.
        """
        if isinstance(list2consider, int):
            list2consider = [list2consider]

        toignore = set(np.unique(self.image))-set(list2consider)
        integers = np.vectorize(lambda x : int(x))
        toignore = integers(list(toignore)).tolist()


        if verbose: print 'Adding labels', toignore,'to the list of labels to ignore...'
        self._ignoredlabels.update(toignore)
        if verbose: print 'Updating labels list...'
        self._labels = self.__labels() 
Example 8
Project: NumpyDL   Author: oujago   File: mlp-digits.py    (license) View Source Project 6 votes vote down vote up
def main(max_iter):
    # prepare
    npdl.utils.random.set_seed(1234)

    # data
    digits = load_digits()

    X_train = digits.data
    X_train /= np.max(X_train)

    Y_train = digits.target
    n_classes = np.unique(Y_train).size

    # model
    model = npdl.model.Model()
    model.add(npdl.layers.Dense(n_out=500, n_in=64, activation=npdl.activations.ReLU()))
    model.add(npdl.layers.Dense(n_out=n_classes, activation=npdl.activations.Softmax()))
    model.compile(loss=npdl.objectives.SCCE(), optimizer=npdl.optimizers.SGD(lr=0.005))

    # train
    model.fit(X_train, npdl.utils.data.one_hot(Y_train), max_iter=max_iter, validation_split=0.1) 
Example 9
Project: segmentation_DLMI   Author: imatge-upc   File: sampling.py    (license) View Source Project 6 votes vote down vote up
def get_weighted_mask(self, image_shape, mask_shape,ROI_mask=None, labels_mask=None):

        if labels_mask is  None:
            raise ValueError('SamplingScheme error: please specify a labels_mask for this sampling scheme')
        print(np.unique(labels_mask))
        mask_boundaries = self.get_mask_boundaries(image_shape, mask_shape,ROI_mask)


        final_mask = np.zeros((self.n_categories,) + labels_mask.shape, dtype="int16")
        for index_cat in range(self.n_categories):
            final_mask[index_cat] = (labels_mask == index_cat,) * mask_boundaries

        final_mask = 1.0 * final_mask / np.reshape(np.sum(np.reshape(final_mask,(self.n_categories,-1)),axis=1),(self.n_categories,)+(1,)*len(image_shape))

        print(np.sum(np.reshape(final_mask,(self.n_categories,-1)),axis=1))
        return final_mask 
Example 10
Project: NeoAnalysis   Author: neoanalysis   File: neuralynxio.py    (license) View Source Project 6 votes vote down vote up
def get_channel_id_by_file_name(self, filename):
        """
        Checking parameters of NCS, NSE and NTT Files for given filename and
        return channel_id if result is consistent
        :param filename:
        :return:
        """
        channel_ids = []
        channel_ids += [k for k in self.parameters_ncs if
                        self.parameters_ncs[k]['filename'] == filename]
        channel_ids += [k for k in self.parameters_nse if
                        self.parameters_nse[k]['filename'] == filename]
        channel_ids += [k for k in self.parameters_ntt if
                        self.parameters_ntt[k]['filename'] == filename]
        if len(np.unique(np.asarray(channel_ids))) == 1:
            return channel_ids[0]
        elif len(channel_ids) > 1:
            raise ValueError(
                    'Ambiguous channel ids detected. Filename %s is associated'
                    ' to different channels of NCS and NSE and NTT %s'
                    '' % (filename, channel_ids))
        else:  # if filename was not detected
            return None 
Example 11
Project: NeoAnalysis   Author: neoanalysis   File: blackrockio.py    (license) View Source Project 6 votes vote down vote up
def __read_unit(self, unit_id, channel_idx):
        """
        Creates unit with unit id for given channel id.
        """
        # define a name for spiketrain
        # (unique identifier: 1000 * elid + unit_nb)
        name = "Unit {0}".format(1000 * channel_idx + unit_id)
        # define description for spiketrain
        desc = 'Unit from channel: {0}, id: {1}'.format(
            channel_idx, self.__get_unit_classification(unit_id))

        un = Unit(
            name=name,
            description=desc,
            file_origin='.'.join([self._filenames['nev'], 'nev']))

        # add additional annotations
        un.annotate(ch_idx=int(channel_idx))
        un.annotate(unit_id=int(unit_id))

        return un 
Example 12
Project: NeoAnalysis   Author: neoanalysis   File: spikesorting.py    (license) View Source Project 6 votes vote down vote up
def __draw_pk2(self):
        self.__cleanPk2()
        if self.units is not None:
            unique_units = np.unique(self.units)
            unique_units = unique_units.tolist()
            pca_1,pca_2 = self.PCAusedList.currentText().split("-")
            pca_1 = np.int(pca_1)-1
            pca_2 = np.int(pca_2)-1
            if self.wavePCAs[0].shape[0]>2:
                xs = self.wavePCAs[:,pca_1]
                ys = self.wavePCAs[:,pca_2]
                self.PcaScatterItem = []
                seg_num = 5000
                for i,ite_unit in enumerate(unique_units):
                    mask = self.units==ite_unit
                    temp_xs = xs[mask]
                    temp_ys = ys[mask]
                    segs = int(ceil(temp_xs.shape[0]/float(seg_num)))
                    for j in range(segs):
                        temp_xs_j = temp_xs[j*seg_num:(j+1)*seg_num]
                        temp_ys_j = temp_ys[j*seg_num:(j+1)*seg_num]
                        self.PcaScatterItem.append(pg.ScatterPlotItem(temp_xs_j,temp_ys_j,pen=self.colors[ite_unit],brush=self.colors[ite_unit],size=3,symbol="o"))
                for i in range(len(self.PcaScatterItem)):
                    self.pk2.addItem(self.PcaScatterItem[i]) 
Example 13
Project: NeoAnalysis   Author: neoanalysis   File: neuralynxio.py    (license) View Source Project 6 votes vote down vote up
def get_channel_id_by_file_name(self, filename):
        """
        Checking parameters of NCS, NSE and NTT Files for given filename and
        return channel_id if result is consistent
        :param filename:
        :return:
        """
        channel_ids = []
        channel_ids += [k for k in self.parameters_ncs if
                        self.parameters_ncs[k]['filename'] == filename]
        channel_ids += [k for k in self.parameters_nse if
                        self.parameters_nse[k]['filename'] == filename]
        channel_ids += [k for k in self.parameters_ntt if
                        self.parameters_ntt[k]['filename'] == filename]
        if len(np.unique(np.asarray(channel_ids))) == 1:
            return channel_ids[0]
        elif len(channel_ids) > 1:
            raise ValueError(
                    'Ambiguous channel ids detected. Filename %s is associated'
                    ' to different channels of NCS and NSE and NTT %s'
                    '' % (filename, channel_ids))
        else:  # if filename was not detected
            return None 
Example 14
Project: NeoAnalysis   Author: neoanalysis   File: blackrockio.py    (license) View Source Project 6 votes vote down vote up
def __read_unit(self, unit_id, channel_idx):
        """
        Creates unit with unit id for given channel id.
        """
        # define a name for spiketrain
        # (unique identifier: 1000 * elid + unit_nb)
        name = "Unit {0}".format(1000 * channel_idx + unit_id)
        # define description for spiketrain
        desc = 'Unit from channel: {0}, id: {1}'.format(
            channel_idx, self.__get_unit_classification(unit_id))

        un = Unit(
            name=name,
            description=desc,
            file_origin='.'.join([self._filenames['nev'], 'nev']))

        # add additional annotations
        un.annotate(ch_idx=int(channel_idx))
        un.annotate(unit_id=int(unit_id))

        return un 
Example 15
Project: spikefuel   Author: duguyue100   File: dvsproc.py    (MIT License) View Source Project 6 votes vote down vote up
def cal_event_count(timestamps):
    """Calculate event count based on timestamps.

    Parameters
    ----------
    timestamps : numpy.ndarray
        timestamps array in 1D array

    Returns
    -------
    event_arr : numpy.ndarray
        array has 2 rows, first row contains timestamps,
        second row consists of corresponding event count at particular
        timestep
    """
    event_ts, event_count = np.unique(timestamps, return_counts=True)

    return np.asarray((event_ts, event_count)) 
Example 16
Project: pscore_match   Author: kellieotto   File: match.py    (BSD 2-Clause "Simplified" License) View Source Project 6 votes vote down vote up
def recode_groups(groups, propensity):
    # Code groups as 0 and 1
    groups = (groups == groups.unique()[0])
    N = len(groups)
    N1 = groups[groups == 1].index
    N2 = groups[groups == 0].index
    g1 = propensity[groups == 1]
    g2 = propensity[groups == 0]
    # Check if treatment groups got flipped - the smaller should correspond to N1/g1
    if len(N1) > len(N2):
       N1, N2, g1, g2 = N2, N1, g2, g1
    return groups, N1, N2, g1, g2

################################################################################
############################# Base Matching Class ##############################
################################################################################ 
Example 17
Project: PyGPS   Author: gregstarr   File: gps.py    (license) View Source Project 6 votes vote down vote up
def minScalErr(stec,el,z,thisBias):
    """
    this determines the slope of the vTEC vs. Elevation line, which
    should be minimized in the minimum scalloping technique for
    receiver bias removal
    inputs:
        stec - time indexed Series of slant TEC values
        el - corresponding elevation values, also Series
        z - mapping function values to convert to vTEC from entire file, may
            contain nans, Series
        thisBias - the bias to be tested and minimized
    """

    intel=np.asarray(el[stec.index],int) # bin the elevation values into int
    sTEC=np.asarray(stec,float)
    zmap = z[stec.index]
    c=np.array([(i,np.average((sTEC[intel==i]-thisBias)
                              /zmap[intel==i])) for i in np.unique(intel) if i>30])

    return np.polyfit(c[:,0],c[:,1],1)[0] 
Example 18
Project: risk-slim   Author: ustunb   File: solution_classes.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def filter_sort_unique(self, max_objval=float('Inf')):
        # filter
        if max_objval < float('inf'):
            good_idx = self.objvals <= max_objval
            self.objvals = self.objvals[good_idx]
            self.solutions = self.solutions[good_idx]

        if len(self.objvals) > 0:
            sort_idx = np.argsort(self.objvals)
            self.objvals = self.objvals[sort_idx]
            self.solutions = self.solutions[sort_idx]

            # unique
            b = np.ascontiguousarray(self.solutions).view(
                np.dtype((np.void, self.solutions.dtype.itemsize * self.P)))
            _, unique_idx = np.unique(b, return_index=True)
            self.objvals = self.objvals[unique_idx]
            self.solutions = self.solutions[unique_idx] 
Example 19
Project: AutoSleepScorerDev   Author: skjerns   File: keras_utils.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def reset(self):
        """ Resets the state of the generator"""
        self.step = 0
        Y = np.argmax(self.Y,1)
        labels = np.unique(Y)
        idx = []
        smallest = len(Y)
        for i,label in enumerate(labels):
            where = np.where(Y==label)[0]
            if smallest > len(where): 
                self.slabel = i
                smallest = len(where)
            idx.append(where)
        self.idx = idx
        self.labels = labels
        self.n_per_class = int(self.batch_size // len(labels))
        self.n_batches = int(np.ceil((smallest//self.n_per_class)))+1
        self.update_probabilities() 
Example 20
Project: AutoSleepScorerDev   Author: skjerns   File: keras_utils.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def __init__(self, X, Y, batch_size,cropsize=0, truncate=False, sequential=False,
                 random=True, val=False, class_weights=None):
        
        assert len(X) == len(Y), 'X and Y must be the same length {}!={}'.format(len(X),len(Y))
        if sequential: print('Using sequential mode')
        print ('starting normal generator')
        self.X = X
        self.Y = Y
        self.rnd_idx = np.arange(len(Y))
        self.Y_last_epoch = []
        self.val = val
        self.step = 0
        self.i = 0
        self.cropsize=cropsize
        self.truncate = truncate
        self.random = False if sequential or val else random
        self.batch_size = int(batch_size)
        self.sequential = sequential
        self.c_weights = class_weights if class_weights else dict(zip(np.unique(np.argmax(Y,1)),np.ones(len(np.argmax(Y,1)))))
        assert set(np.argmax(Y,1)) == set([int(x) for x in self.c_weights.keys()]), 'not all labels in class weights'
        self.n_batches = int(len(X)//batch_size if truncate else np.ceil(len(X)/batch_size))
        if self.random: self.randomize() 
Example 21
Project: AutoSleepScorerDev   Author: skjerns   File: keras_utils.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def next_normal(self):
        x_batch = self.X[self.step*self.batch_size:(self.step+1)*self.batch_size]
        y_batch = self.Y[self.step*self.batch_size:(self.step+1)*self.batch_size]
        
        diff = len(x_batch[0]) - self.cropsize
        if self.cropsize!=0 and not self.val:
            start = np.random.choice(np.arange(0,diff+5,5), len(x_batch))
            x_batch = [x[start[i]:start[i]+self.cropsize,:] for i,x in enumerate(x_batch)]
        elif self.cropsize !=0 and self.val:
            x_batch = [x[diff//2:diff//2+self.cropsize] for i,x in enumerate(x_batch)]
            
        x_batch = np.array(x_batch, dtype=np.float32)
        y_batch = np.array(y_batch, dtype=np.int32)
        self.step+=1
        if self.val:
            self.Y_last_epoch.extend(y_batch)
            return x_batch # for validation generator, save the new y_labels
        else:
            weights = np.ones(len(y_batch))
            for t in np.unique(np.argmax(y_batch,1)):
                weights[np.argmax(y_batch,1)==t] = self.c_weights[t]
            return (x_batch,y_batch) 
Example 22
Project: PersonalizedMultitaskLearning   Author: mitmedialab   File: generic_wrapper.py    (license) View Source Project 6 votes vote down vote up
def get_preds_true_for_task(self,train_tasks, test_tasks, param_dict):
		t = param_dict['task_num']
		X = train_tasks[t]['X']
		y = train_tasks[t]['Y']

		test_X = test_tasks[t]['X']
		true_y = list(test_tasks[t]['Y'].flatten())

		if len(y)==0 or len(X)==0 or len(test_X) == 0 or len(true_y)==0:
			return None, None

		if self.cant_train_with_one_class and len(np.unique(y))==1:
			preds = list(np.unique(y)[0]*np.ones(len(true_y)))
		else:
			preds = self.train_and_predict_task(t, X, y, test_X, param_dict)

		return preds, true_y 
Example 23
Project: a-cadmci   Author: florez87   File: Utilities.py    (license) View Source Project 6 votes vote down vote up
def getClasses(labels):
        """
        Get unique values from a column of labels.
        
        Parameters
        ----------
        labels: array-like of shape = [number_samples] or [number_samples, number_outputs]
            The target values (class labels in classification).
        
        Return
        ----------
        classes: ndarray
            The sorted unique labels
        
        ids: ndarray
            The indices of the first occurrences of the unique values in the original array.
        """
        uniques, ids = numpy.unique(labels, return_inverse=True)
        return uniques, ids 
Example 24
Project: OptML   Author: johannespetrat   File: test_gridsearch_optimizer.py    (license) View Source Project 6 votes vote down vote up
def grid_spacing(self):
        interval = [1,10]
        p1 = Parameter('A', 'integer', lower=interval[0], upper=interval[1])
        p2 = Parameter('B', 'continuous', lower=interval[0], upper=interval[1])
        p3 = Parameter('C', 'categorical', possible_values=['Bla1', 'Bla2'])
        p4 = Parameter('D', 'boolean')
        grid_sizes = {'A': 5, 'B': 6}
        grid_search = GridSearchOptimizer(model, [p1, p2, p3, p4], clf_score, grid_sizes)
        grid = grid_search.grid
        for params in grid:
            self.assertIn(params['A'], range(*interval))
            self.assertIn(params['B']>=interval[0])
            self.assertIn(params['B']<=interval[1])
            self.assertIn(params['C'], ['Bla1', 'Bla2'])
            self.assertIn(params['D'], ['True', 'False'])
        lenA = len(np.unique([params['A'] for params in grid]))
        lenB = len(np.unique([params['B'] for params in grid]))
        lenC = len(np.unique([params['C'] for params in grid]))
        lenD = len(np.unique([params['D'] for params in grid]))
        self.assertTrue((lenA==grid_sizes['A']) or (lenA==grid_sizes['A']+1))
        self.assertTrue((lenB==grid_sizes['B']) or (lenB==grid_sizes['B']+1))
        self.assertTrue((lenC==grid_sizes['C']) or (lenC==grid_sizes['C']+1))
        self.assertTrue((lenD==grid_sizes['D']) or (lenD==grid_sizes['D']+1)) 
Example 25
Project: audio_scripts   Author: audiofilter   File: spectrogram_main.py    (license) View Source Project 6 votes vote down vote up
def logscale_spec(spec, sr=44100, factor=20.):
    timebins, freqbins = np.shape(spec)

    scale = np.linspace(0, 1, freqbins) ** factor
    scale *= (freqbins-1)/max(scale)
    scale = np.unique(np.round(scale))
    
    # create spectrogram with new freq bins
    newspec = np.complex128(np.zeros([timebins, len(scale)]))
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1)
        else:        
            newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1)
    
    # list center freq of bins
    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
    freqs = []
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            freqs += [np.mean(allfreqs[scale[i]:])]
        else:
            freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]
    
    return newspec, freqs 
Example 26
Project: audio_scripts   Author: audiofilter   File: spectrogram.py    (license) View Source Project 6 votes vote down vote up
def logscale_spec(spec, sr=44100, factor=20.):
    timebins, freqbins = np.shape(spec)

    scale = np.linspace(0, 1, freqbins) ** factor
    scale *= (freqbins-1)/max(scale)
    scale = np.unique(np.round(scale))
    
    # create spectrogram with new freq bins
    newspec = np.complex128(np.zeros([timebins, len(scale)]))
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1)
        else:        
            newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1)
    
    # list center freq of bins
    allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
    freqs = []
    for i in range(0, len(scale)):
        if i == len(scale)-1:
            freqs += [np.mean(allfreqs[scale[i]:])]
        else:
            freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]
    
    return newspec, freqs 
Example 27
Project: kmeans-service   Author: MAYHEM-Lab   File: sf_kmeans.py    (license) View Source Project 6 votes vote down vote up
def free_parameters(self, data):
        """
        Compute free parameters for the model fit using K-Means
        """
        K = np.unique(self.labels_).shape[0]  # number of clusters
        n, d = data.shape
        r = (K - 1) + (K * d)
        if self.metric == 'euclidean':
            r += 1  # one parameter for variance
        elif self.metric == 'mahalanobis':
            if self.covar_type == 'full' and self.covar_tied:
                r += (d * (d + 1) * 0.5)  # half of the elements (including diagonal) in the matrix
            if self.covar_type == 'full' and not self.covar_tied:
                r += (d * (d + 1) * 0.5 * K)  # half of the elements (including diagonal) in the matrix
            if self.covar_type == 'diag' and self.covar_tied:
                r += d  # diagonal elements of the matrix
            if self.covar_type == 'diag' and not self.covar_tied:
                r += (d * K)  # diagonal elements of the matrix
            if self.covar_type == 'spher' and self.covar_tied:
                r += 1  # all diagonal elements are equal
            if self.covar_type == 'spher' and not self.covar_tied:
                r += K  # all diagonal elements are equal
        return r 
Example 28
Project: sef   Author: passalis   File: custom_dr.py    (license) View Source Project 6 votes vote down vote up
def sim_target_supervised(target_data, target_labels, sigma, idx, target_params):
    cur_labels = target_labels[idx]
    N = cur_labels.shape[0]

    N_labels = len(np.unique(cur_labels))

    Gt, mask = np.zeros((N, N)), np.zeros((N, N))

    for i in range(N):
        for j in range(N):
            if cur_labels[i] == cur_labels[j]:
                Gt[i, j] = 0.8
                mask[i, j] = 1
            else:
                Gt[i, j] = 0.1
                mask[i, j] = 0.8 / (N_labels - 1)

    return np.float32(Gt), np.float32(mask) 
Example 29
Project: em_examples   Author: geoscixyz   File: DCIP_overburden_PseudoSection.py    (license) View Source Project 6 votes vote down vote up
def get_Surface_Potentials(mtrue, survey, src, field_obj):

    phi = field_obj['phi']
    CCLoc = mesh.gridCC
    XLoc = np.unique(mesh.gridCC[:, 0])
    surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc)
    phiSurface = phi[surfaceInd]
    phiScale = 0.

    if(survey == "Pole-Dipole" or survey == "Pole-Pole"):
        refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC')
        # refPoint =  CCLoc[refInd]
        # refSurfaceInd = np.where(xSurface == refPoint[0])
        # phiScale = np.median(phiSurface)
        phiScale = phi[refInd]
        phiSurface = phiSurface - phiScale

    return XLoc, phiSurface, phiScale 
Example 30
Project: em_examples   Author: geoscixyz   File: sphereElectrostatic_example.py    (license) View Source Project 6 votes vote down vote up
def Plot_ChargesDensity(XYZ, sig0, sig1, R, E0, ax):

    xr, yr, zr = np.unique(XYZ[:, 0]), np.unique(XYZ[:, 1]), np.unique(XYZ[:, 2])
    xcirc = xr[np.abs(xr) <= R]

    Et, Ep, Es = get_ElectricField(XYZ, sig0, sig1, R, E0)
    rho = get_ChargesDensity(XYZ, sig0, sig1, R, Et, Ep)

    ax.set_xlim([xr.min(), xr.max()])
    ax.set_ylim([yr.min(), yr.max()])
    ax.set_aspect('equal')
    Cplot = ax.pcolor(xr, yr, rho.reshape(xr.size, yr.size))
    cb1 = plt.colorbar(Cplot, ax=ax)
    cb1.set_label(label= 'Charge Density ($C/m^2$)', size=ftsize_label) #weight='bold')
    cb1.ax.tick_params(labelsize=ftsize_axis)
    ax.plot(xcirc, np.sqrt(R**2-xcirc**2), '--k', xcirc, -np.sqrt(R**2-xcirc**2), '--k')
    ax.set_ylabel('Y coordinate ($m$)', fontsize=ftsize_label)
    ax.set_xlabel('X coordinate ($m$)', fontsize=ftsize_label)
    ax.tick_params(labelsize=ftsize_axis)
    ax.set_title('Charges Density', fontsize=ftsize_title)

    return ax 
Example 31
Project: em_examples   Author: geoscixyz   File: DCWidget_Overburden_2_5D.py    (license) View Source Project 6 votes vote down vote up
def get_Surface_Potentials(mtrue, survey, src, field_obj):

    phi = field_obj['phi']
    CCLoc = mesh.gridCC
    XLoc = np.unique(mesh.gridCC[:, 0])
    surfaceInd, zsurfaceLoc = get_Surface(mtrue, XLoc)
    phiSurface = phi[surfaceInd]
    phiScale = 0.

    if(survey == "Pole-Dipole" or survey == "Pole-Pole"):
        refInd = Utils.closestPoints(mesh, [xmax+60., 0.], gridLoc='CC')
        # refPoint =  CCLoc[refInd]
        # refSurfaceInd = np.where(xSurface == refPoint[0])
        # phiScale = np.median(phiSurface)
        phiScale = phi[refInd]
        phiSurface = phiSurface - phiScale

    return XLoc, phiSurface, phiScale 
Example 32
Project: AutoML5   Author: djajetic   File: data_manager.py    (MIT License) View Source Project 5 votes vote down vote up
def getTypeProblem (self, solution_filename):
     		''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
		if 'task' not in self.info.keys():
			solution = np.array(data_converter.file_to_array(solution_filename))
			target_num = solution.shape[1]
			self.info['target_num']=target_num
			if target_num == 1: # if we have only one column
				solution = np.ravel(solution) # flatten
				nbr_unique_values = len(np.unique(solution))
				if nbr_unique_values < len(solution)/8:
					# Classification
					self.info['label_num'] = nbr_unique_values
					if nbr_unique_values == 2:
						self.info['task'] = 'binary.classification'
						self.info['target_type'] = 'Binary'
					else:
						self.info['task'] = 'multiclass.classification'
						self.info['target_type'] = 'Categorical'
				else:
					# Regression
					self.info['label_num'] = 0
					self.info['task'] = 'regression'
					self.info['target_type'] = 'Numerical'     
			else:
				# Multilabel or multiclass       
				self.info['label_num'] = target_num
				self.info['target_type'] = 'Binary' 
				if any(item > 1 for item in map(np.sum,solution.astype(int))):
					self.info['task'] = 'multilabel.classification'     
				else:
					self.info['task'] = 'multiclass.classification'        
		return self.info['task'] 
Example 33
Project: AutoML5   Author: djajetic   File: libscores.py    (MIT License) View Source Project 5 votes vote down vote up
def tiedrank(a):
    ''' Return the ranks (with base 1) of a list resolving ties by averaging.
     This works for numpy arrays.'''    
    m=len(a)
    # Sort a in ascending order (sa=sorted vals, i=indices)
    i=a.argsort()
    sa=a[i]
    # Find unique values
    uval=np.unique(a)     
    # Test whether there are ties 
    R=np.arange(m, dtype=float)+1 # Ranks with base 1
    if len(uval)!=m:
        # Average the ranks for the ties 
        oldval=sa[0]
        newval=sa[0]
        k0=0
        for k in range(1,m):
            newval=sa[k]
            if newval==oldval:
                # moving average
                R[k0:k+1]=R[k-1]*(k-k0)/(k-k0+1)+R[k]/(k-k0+1)
            else:
                k0=k;
                oldval=newval
    # Invert the index
    S=np.empty(m)
    S[i]=R
    return S 
Example 34
Project: AutoML5   Author: djajetic   File: data_converter.py    (MIT License) View Source Project 5 votes vote down vote up
def binarization (array):
	''' Takes a binary-class datafile and turn the max value (positive class) into 1 and the min into 0'''
	array = np.array(array, dtype=float) # conversion needed to use np.inf after
	if len(np.unique(array)) > 2:
		raise ValueError ("The argument must be a binary-class datafile. {} classes detected".format(len(np.unique(array))))
	
	# manipulation which aims at avoid error in data with for example classes '1' and '2'.
	array[array == np.amax(array)] = np.inf
	array[array == np.amin(array)] = 0
	array[array == np.inf] = 1
	return np.array(array, dtype=int) 
Example 35
Project: IntroToDeepLearning   Author: robb-brown   File: input_data.py    (MIT License) View Source Project 5 votes vote down vote up
def __init__(self, images, labels, fake_data=False):
    if fake_data:
      self._num_examples = 10000
    else:
      assert images.shape[0] == labels.shape[0], (
          "images.shape: %s labels.shape: %s" % (images.shape,
                                                 labels.shape))
      self._num_examples = images.shape[0]

      # Convert shape from [num examples, rows, columns, depth]
      # to [num examples, rows*columns] (assuming depth == 1)
      self.imageShape = images.shape[1:]
      self.imageChannels = self.imageShape[2]

      images = images.reshape(images.shape[0],
                              images.shape[1] * images.shape[2] * images.shape[3])
      # Convert from [0, 255] -> [0.0, 1.0].
      images = images.astype(numpy.float32)
      images = numpy.multiply(images, 1.0 / 255.0)
    self._images = images
    self._labels = labels
    try:
      if len(numpy.shape(self._labels)) == 1:
        self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
    except:
      traceback.print_exc()
    self._epochs_completed = 0
    self._index_in_epoch = 0 
Example 36
Project: IntroToDeepLearning   Author: robb-brown   File: input_data.py    (MIT License) View Source Project 5 votes vote down vote up
def __init__(self, images, labels, fake_data=False):
    if fake_data:
      self._num_examples = 10000
    else:
      assert images.shape[0] == labels.shape[0], (
          "images.shape: %s labels.shape: %s" % (images.shape,
                                                 labels.shape))
      self._num_examples = images.shape[0]

      # Convert shape from [num examples, rows, columns, depth]
      # to [num examples, rows*columns] (assuming depth == 1)
      self.imageShape = images.shape[1:]
      self.imageChannels = self.imageShape[2]

      images = images.reshape(images.shape[0],
                              images.shape[1] * images.shape[2] * images.shape[3])
      # Convert from [0, 255] -> [0.0, 1.0].
      images = images.astype(numpy.float32)
      images = numpy.multiply(images, 1.0 / 255.0)
    self._images = images
    self._labels = labels
    try:
      if len(numpy.shape(self._labels)) == 1:
        self._labels = dense_to_one_hot(self._labels,len(numpy.unique(self._labels)))
    except:
      traceback.print_exc()
    self._epochs_completed = 0
    self._index_in_epoch = 0 
Example 37
Project: rca-evaluation   Author: sieve-microservices   File: cluster.py    (license) View Source Project 5 votes vote down vote up
def cluster_service(path, service, cluster_size, prev_metadata=None):

    filename = os.path.join(path, service["preprocessed_filename"])
    df = pd.read_csv(filename, sep="\t", index_col='time', parse_dates=True)

    initial_idx = None
    if prev_metadata:
        initial_idx = get_initial_clustering(service["name"], prev_metadata, df.columns)
        # adjust cluster_size if an initial assigment has been found
        if initial_idx is not None:
            cluster_size = len(np.unique(initial_idx))

    prefix = "%s/%s-cluster-%d" % (path, service["name"], cluster_size)
    if os.path.exists(prefix + "_1.png"):
        print("skip " + prefix)
        return (None, None)

    cluster_metrics, score, filenames = do_kshape(prefix, df, cluster_size, initial_idx)
    if cluster_size < 2:
        # no silhouette_score for cluster size 1
        return (None, None)
    print("silhouette_score: %f" % score)

    # protect the write access to the metadata file
    metadata_lock.acquire()
    with metadata.update(path) as data:
        for srv in data["services"]:
            if srv["name"] == service["name"]:
                if "clusters" not in srv:
                    srv["clusters"] = {}
                d = dict(silhouette_score=score, filenames=filenames, metrics=cluster_metrics)
                srv["clusters"][cluster_size] = d
    metadata_lock.release()

    return (service["name"], cluster_size) 
Example 38
Project: spyking-circus   Author: spyking-circus   File: plot.py    (license) View Source Project 5 votes vote down vote up
def view_waveforms_clusters(data, halo, threshold, templates, amps_lim, n_curves=200, save=False):
    
    nb_templates = templates.shape[1]
    n_panels     = numpy.ceil(numpy.sqrt(nb_templates))
    mask         = numpy.where(halo > -1)[0]
    clust_idx    = numpy.unique(halo[mask])
    fig          = pylab.figure()    
    square       = True
    center       = len(data[0] - 1)//2
    for count, i in enumerate(xrange(nb_templates)):
        if square:
            pylab.subplot(n_panels, n_panels, count + 1)
            if (numpy.mod(count, n_panels) != 0):
                pylab.setp(pylab.gca(), yticks=[])
            if (count < n_panels*(n_panels - 1)):
                pylab.setp(pylab.gca(), xticks=[])
        
        subcurves = numpy.where(halo == clust_idx[count])[0]
        for k in numpy.random.permutation(subcurves)[:n_curves]:
            pylab.plot(data[k], '0.5')
        
        pylab.plot(templates[:, count], 'r')        
        pylab.plot(amps_lim[count][0]*templates[:, count], 'b', alpha=0.5)
        pylab.plot(amps_lim[count][1]*templates[:, count], 'b', alpha=0.5)
        
        xmin, xmax = pylab.xlim()
        pylab.plot([xmin, xmax], [-threshold, -threshold], 'k--')
        pylab.plot([xmin, xmax], [threshold, threshold], 'k--')
        #pylab.ylim(-1.5*threshold, 1.5*threshold)
        ymin, ymax = pylab.ylim()
        pylab.plot([center, center], [ymin, ymax], 'k--')
        pylab.title('Cluster %d' %i)

    if nb_templates > 0:
        pylab.tight_layout()
    if save:
        pylab.savefig(os.path.join(save[0], 'waveforms_%s' %save[1]))
        pylab.close()
    else:
        pylab.show()
    del fig 
Example 39
Project: spyking-circus   Author: spyking-circus   File: utils.py    (license) View Source Project 5 votes vote down vote up
def check_consistent_length(*arrays):
    """Check that all arrays have consistent first dimensions.
    Checks whether all objects in arrays have the same shape or length.
    Parameters
    ----------
    *arrays : list or tuple of input objects.
        Objects that will be checked for consistent length.
    """

    uniques = np.unique([_num_samples(X) for X in arrays if X is not None])
    if len(uniques) > 1:
        raise ValueError("Found arrays with inconsistent numbers of samples: "
                         "%s" % str(uniques)) 
Example 40
Project: pytorch-semseg   Author: meetshah1995   File: mit_sceneparsing_benchmark_loader.py    (MIT License) View Source Project 5 votes vote down vote up
def transform(self, img, lbl):
        """transform

        :param img:
        :param lbl:
        """
        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= self.mean
        img = m.imresize(img, (self.img_size[0], self.img_size[1]))
        # Resize scales images from 0 to 255, thus we need
        # to divide by 255.0
        img = img.astype(float) / 255.0
        # NHWC -> NCWH
        img = img.transpose(2, 0, 1)

        classes = np.unique(lbl)
        lbl = lbl.astype(float)
        lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
        lbl = lbl.astype(int)

        if not np.all(classes == np.unique(lbl)):
            print("WARN: resizing labels yielded fewer classes")

        if not np.all(np.unique(lbl) < self.n_classes):
            raise ValueError("Segmentation map contained invalid class values")

        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()

        return img, lbl 
Example 41
Project: pytorch-semseg   Author: meetshah1995   File: cityscapes_loader.py    (MIT License) View Source Project 5 votes vote down vote up
def transform(self, img, lbl):
        """transform

        :param img:
        :param lbl:
        """
        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= self.mean
        img = m.imresize(img, (self.img_size[0], self.img_size[1]))
        # Resize scales images from 0 to 255, thus we need
        # to divide by 255.0
        img = img.astype(float) / 255.0
        # NHWC -> NCWH
        img = img.transpose(2, 0, 1)

        classes = np.unique(lbl)
        lbl = lbl.astype(float)
        lbl = m.imresize(lbl, (self.img_size[0], self.img_size[1]), 'nearest', mode='F')
        lbl = lbl.astype(int)

        if not np.all(classes == np.unique(lbl)):
            print("WARN: resizing labels yielded fewer classes")

        if not np.all(np.unique(lbl) < self.n_classes):
            raise ValueError("Segmentation map contained invalid class values")

        img = torch.from_numpy(img).float()
        lbl = torch.from_numpy(lbl).long()

        return img, lbl 
Example 42
Project: MKLMM   Author: omerwe   File: mklmm.py    (BSD 2-Clause "Simplified" License) View Source Project 5 votes vote down vote up
def fit(self, X, C, y, regions, kernelType, reml=True, maxiter=100):
	
		#construct a list of kernel names (one for each region) 
		if (kernelType == 'adapt'): kernelNames = self.buildKernelAdapt(X, C, y, regions, reml, maxiter)
		else: kernelNames = [kernelType] * len(regions)			
		
		#perform optimization
		kernelObj, hyp_kernels, sig2e, fixedEffects = self.optimize(X, C, y, kernelNames, regions, reml, maxiter)
		
		#compute posterior distribution
		Ktraintrain = kernelObj.getTrainKernel(hyp_kernels)
		post = self.infExact_scipy_post(Ktraintrain, C, y, sig2e, fixedEffects)
		
		#fix intercept if phenotype is binary
		if (len(np.unique(y)) == 2):			
			controls = (y<y.mean())
			cases = ~controls
			meanVec = C.dot(fixedEffects)
			mu, var = self.getPosteriorMeanAndVar(np.diag(Ktraintrain), Ktraintrain, post, meanVec)										
			fixedEffects[0] -= optimize.minimize_scalar(self.getNegLL, args=(mu, np.sqrt(sig2e+var), controls, cases), method='brent').x				
		
		#construct trainObj
		trainObj = dict([])
		trainObj['sig2e'] = sig2e
		trainObj['hyp_kernels'] = hyp_kernels
		trainObj['fixedEffects'] = fixedEffects		
		trainObj['kernelNames'] = kernelNames
		
		return trainObj 
Example 43
Project: kaggle_dsb2017   Author: astoc   File: unet_d8g_222f.py    (MIT License) View Source Project 5 votes vote down vote up
def load_scan(path):
    slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
    #slices.sort(key = lambda x: int(x.InstanceNumber))
       
    acquisitions = [x.AcquisitionNumber for x in slices]
    
    vals, counts = np.unique(acquisitions, return_counts=True)
    vals = vals[::-1]  # reverse order so the later acquisitions are first (the np.uniques seems to always return the ordered 1 2 etc.
    counts = counts[::-1]
    
    ## take the acquistions that has more entries; if these are identical take the later  entrye
    acq_val_sel = vals[np.argmax(counts)]
  

    ##acquisitions = sorted(np.unique(acquisitions), reverse=True)
    
    if len(vals) > 1:
        print ("WARNING ##########: MULTIPLE acquisitions & counts, acq_val_sel, path: ", vals, counts, acq_val_sel, path)
    slices2= [x for x in slices if x.AcquisitionNumber == acq_val_sel]
    
    slices = slices2
    
   
    ## ONE path includes 2 acquisitions (2 sets), take the latter acquiisiton only whihch cyupically is better than the first/previous ones.
    ## example of the     '../input/stage1/b8bb02d229361a623a4dc57aa0e5c485'
    
    #slices.sort(key = lambda x: int(x.ImagePositionPatient[2]))  # from v 8, BUG should be float
    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))  # from v 9
    try:
        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
    except:
        slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
        
    for s in slices:
        s.SliceThickness = slice_thickness
        
    return slices 
Example 44
Project: kaggle_dsb2017   Author: astoc   File: lungs_var3_d8g_222f.py    (MIT License) View Source Project 5 votes vote down vote up
def largest_label_volume(im, bg=-1):
    vals, counts = np.unique(im, return_counts=True)

    counts = counts[vals != bg]
    vals = vals[vals != bg]

    if len(counts) > 0:
        return vals[np.argmax(counts)]
    else:
        return None

#image=sample_image 
Example 45
Project: cellranger   Author: 10XGenomics   File: molecule_counter.py    (license) View Source Project 5 votes vote down vote up
def get_chunks_by_gem_group(self):
        """ Return exactly one chunk per gem group."""
        gem_group_arr = self.get_column('gem_group')
        # verify gem groups are sorted
        assert np.all(np.diff(gem_group_arr)>=0)
        unique_ggs = np.unique(gem_group_arr)
        gg_key = lambda i: gem_group_arr[i]
        chunk_iter = self.get_chunks_from_partition(unique_ggs, gg_key)
        for (gg, chunk) in zip(unique_ggs, chunk_iter):
            yield (gg, chunk[0], chunk[1]) 
Example 46
Project: cellranger   Author: 10XGenomics   File: stats.py    (license) View Source Project 5 votes vote down vote up
def compute_readpairs_per_umi_threshold(reads, subsample_rate):
    ''' Compute a threshold above which the UMIs are unlikely to be PCR off-products.
        reads (np.array(int)) - Read pairs for each UMI
        subsample_rate (float) - Subsample reads to this fraction.
        Returns threshold (int) - The RPPU threshold in the subsampled space '''

    if len(np.unique(reads)) < 2:
        print 'Skipping RPPU threshold calculation.'
        return 1

    print 'RPPU subsample rate: %0.4f' % subsample_rate

    reads = np.random.binomial(reads, subsample_rate)
    reads = reads[reads > 0]

    if len(np.unique(reads)) < 2:
        print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.'
        return 1

    new_n50 = tk_stats.NX(reads, 0.5)

    print 'New N50: %d:' % new_n50

    # Log-transform counts
    log_reads = np.log(reads)

    # Run K-Means. Reshape necessary because kmeans takes a matrix.
    kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1)))
    kmeans.predict(log_reads.reshape((-1,1)))

    # Take the cluster with the smallest mean
    min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0]

    print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_))))
    print 'RPPU component members: ' + str(np.bincount(kmeans.labels_))

    # Take the max element in the min-cluster
    threshold = np.max(reads[kmeans.labels_ == min_cluster])

    return threshold 
Example 47
Project: cellranger   Author: 10XGenomics   File: hdf5.py    (license) View Source Project 5 votes vote down vote up
def append_data_column(ds, column):

    # Extend the dataset to fit the new data
    new_count = column.shape[0]
    existing_count = ds.shape[0]
    ds.resize((existing_count + new_count,))

    levels = get_levels(ds)

    if levels is not None:
        # update levels if we have new unique values
        if type(column.values) == p.Categorical:
            added_levels = set(column.values.categories) - set(levels)
        elif len(column) == 0:
            # Workaround for bug in pandas - get a crash in .unique() for an empty series
            added_levels = set([])
        else:
            added_levels = set(column.unique()) - set(levels)

        new_levels = list(levels)
        new_levels.extend(added_levels)

        # Check if the new categorical column has more levels
        # than the current bit width supports.
        # If so, rewrite the existing column data w/ more bits
        if len(new_levels) > np.iinfo(ds.dtype).max:
            new_dtype = pick_cat_dtype(len(new_levels))
            ds = widen_cat_column(ds, new_dtype)

        new_levels = np.array(new_levels, dtype=np.object)
        new_data = make_index_array(new_levels, column.values, ds.dtype)

        clear_levels(ds)
        create_levels(ds, new_levels)
    else:
        new_data = column

    # Append new data
    ds[existing_count:(existing_count + new_count)] = new_data 
Example 48
Project: FCN_train   Author: 315386775   File: colorlabel.py    (license) View Source Project 5 votes vote down vote up
def _label2rgb_avg(label_field, image, bg_label=0, bg_color=(0, 0, 0)):
    """Visualise each segment in `label_field` with its mean color in `image`.

    Parameters
    ----------
    label_field : array of int
        A segmentation of an image.
    image : array, shape ``label_field.shape + (3,)``
        A color image of the same spatial shape as `label_field`.
    bg_label : int, optional
        A value in `label_field` to be treated as background.
    bg_color : 3-tuple of int, optional
        The color for the background label

    Returns
    -------
    out : array, same shape and type as `image`
        The output visualization.
    """
    out = np.zeros_like(image)
    labels = np.unique(label_field)
    bg = (labels == bg_label)
    if bg.any():
        labels = labels[labels != bg_label]
        out[bg] = bg_color
    for label in labels:
        mask = (label_field == label).nonzero()
        color = image[mask].mean(axis=0)
        out[mask] = color
    return out 
Example 49
Project: soccerstan   Author: Torvaney   File: soccerstan.py    (license) View Source Project 5 votes vote down vote up
def stan_map(vector):
    """ Create a map of vector items : id. """
    unique_items = np.unique(vector)
    return {item: id_ for id_, item in enumerate(unique_items, start=1)} 
Example 50
Project: wmd-relax   Author: src-d   File: __init__.py    (license) View Source Project 5 votes vote down vote up
def _common_vocabulary_batch(self, words1, weights1, i2):
        words2, weights2 = self._get_vocabulary(i2)
        joint, index = numpy.unique(numpy.concatenate((words1, words2)),
                                    return_index=True)
        nw1 = numpy.zeros(len(joint), dtype=numpy.float32)
        cmp = index < len(words1)
        nw1[numpy.nonzero(cmp)] = weights1[index[cmp]]
        nw2 = numpy.zeros(len(joint), dtype=numpy.float32)
        nw2[numpy.searchsorted(joint, words2)] = weights2
        return joint, nw1, nw2