Python scipy.stats.itemfreq() Examples

The following are 13 code examples of scipy.stats.itemfreq(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function

Example #1

Source File: utility_functions.py From MaskTrack with MIT License

6 votes

def cross_entropy_loss_weighted(output, labels):

    temp = labels.data.cpu().numpy()
    freqCount = scipystats.itemfreq(temp)
    total = freqCount[0][1]+freqCount[1][1]
    perc_1 = freqCount[1][1]/total
    perc_0 = freqCount[0][1]/total

    weight_array = [perc_1, perc_0]

    if torch.cuda.is_available():
        weight_tensor = torch.FloatTensor(weight_array).cuda()
    else:
        weight_tensor = torch.FloatTensor(weight_array)

    ce_loss = nn.CrossEntropyLoss(weight=weight_tensor)
    images, channels, height, width = output.data.shape
    loss = ce_loss(output, labels.long().view(images, height, width))
    return loss

Example #2

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_numeric_types(self):
        # Check itemfreq works for all dtypes (adapted from np.unique tests)
        def _check_itemfreq(dt):
            a = np.array(self.a, dt)
            with suppress_warnings() as sup:
                sup.filter(DeprecationWarning)
                v = stats.itemfreq(a)
            assert_array_equal(v[:, 0], [1, 2, 5, 7])
            assert_array_equal(v[:, 1], np.array([20, 10, 20, 20], dtype=dt))

        dtypes = [np.int32, np.int64, np.float32, np.float64,
                  np.complex64, np.complex128]
        for dt in dtypes:
            _check_itemfreq(dt)

Example #3

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_object_arrays(self):
        a, b = self.a, self.b
        dt = 'O'
        aa = np.empty(len(a), dt)
        aa[:] = a
        bb = np.empty(len(b), dt)
        bb[:] = b
        with suppress_warnings() as sup:
            sup.filter(DeprecationWarning)
            v = stats.itemfreq(aa)
        assert_array_equal(v[:, 0], bb)

Example #4

Source File: test_stats.py From GraphicDesignPatternByPython with MIT License

5 votes

def test_structured_arrays(self):
        a, b = self.a, self.b
        dt = [('', 'i'), ('', 'i')]
        aa = np.array(list(zip(a, a)), dt)
        bb = np.array(list(zip(b, b)), dt)
        with suppress_warnings() as sup:
            sup.filter(DeprecationWarning)
            v = stats.itemfreq(aa)
        # Arrays don't compare equal because v[:,0] is object array
        assert_equal(tuple(v[2, 0]), tuple(bb[2]))

Example #5

Source File: utils.py From MagnetLoss-PyTorch with MIT License

5 votes

def unsupervised_clustering_accuracy(emb, labels):
    k = np.unique(labels).size
    kmeans = KMeans(n_clusters=k, max_iter=35, n_init=15, n_jobs=-1).fit(emb)
    emb_labels = kmeans.labels_
    G = np.zeros((k,k))
    for i in range(k):
        lbl = labels[emb_labels == i]
        uc = itemfreq(lbl)
        for uu, cc in uc:
            G[i,uu] = -cc
    A = linear_assignment_.linear_assignment(G)
    acc = 0.0
    for (cluster, best) in A:
        acc -= G[cluster,best]
    return acc / float(len(labels))

Example #6

Source File: postprocessing.py From open-solution-data-science-bowl-2018 with MIT License

5 votes

def mean_blob_size(mask):
    labels, labels_nr = ndi.label(mask)
    if labels_nr < 2:
        mean_area = 1
        mean_radius = 1
    else:
        mean_area = int(itemfreq(labels)[1:, 1].mean())
        mean_radius = int(np.round(np.sqrt(mean_area / np.pi)))
    return mean_area, mean_radius

Example #7

Source File: processing.py From MDI with MIT License

5 votes

def compute_histogram(data, labels):
    histogram = dict(itemfreq(data))
    for label in labels:
        if label not in histogram:
            histogram[label] = .0
    return histogram

Example #8

Source File: example_adult_mcar.py From MDI with MIT License

5 votes

def compute_histogram(data, labels):
    histogram = dict(itemfreq(data))
    for label in labels:
        if label not in histogram:
            histogram[label] = .0
    return histogram

Example #9

Source File: example_adult.py From MDI with MIT License

5 votes

def compute_histogram(data, labels):
    histogram = itemfreq(sorted(data))
    for label in labels:
        if label not in histogram[:,0]:
            histogram = np.vstack((histogram,
                                   np.array([[label, 0]], dtype=object)))
    histogram = histogram[histogram[:,0].argsort()]
    return histogram

# compute histograms

Example #10

Source File: example_votes.py From MDI with MIT License

5 votes

def compute_histogram(data, labels):
    histogram = itemfreq(sorted(data))
    for label in labels:
        if label not in histogram[:,0]:
            histogram = np.vstack((histogram,
                                   np.array([[label, 0]], dtype=object)))
    histogram = histogram[histogram[:,0].argsort()]
    return histogram

# compute histograms

Example #11

Source File: term_similarity.py From text-analytics-with-python with Apache License 2.0

5 votes

def boc_term_vectors(word_list):
    word_list = [word.lower() for word in word_list]
    unique_chars = np.unique(
                        np.hstack([list(word) 
                        for word in word_list]))
    word_list_term_counts = [{char: count for char, count in itemfreq(list(word))}
                             for word in word_list]
    
    boc_vectors = [np.array([int(word_term_counts.get(char, 0)) 
                            for char in unique_chars])
                   for word_term_counts in word_list_term_counts]
    return list(unique_chars), boc_vectors

Example #12

Source File: tree.py From MLAlgorithms with MIT License

5 votes

def _calculate_leaf_value(self, targets):
        """Find optimal value for leaf."""
        if self.loss is not None:
            # Gradient boosting
            self.outcome = self.loss.approximate(targets["actual"], targets["y_pred"])
        else:
            # Random Forest
            if self.regression:
                # Mean value for regression task
                self.outcome = np.mean(targets["y"])
            else:
                # Probability for classification task
                self.outcome = stats.itemfreq(targets["y"])[:, 1] / float(targets["y"].shape[0])

Example #13

Source File: tests.py From barrista with MIT License

4 votes

def test_ResizingMonitor_random_scale(self):
        """Test the resizing monitor random scale capability."""
        import barrista.design as design
        import numpy as np
        from barrista.monitoring import CyclingDataMonitor, ResizingMonitor
        import barrista.monitoring as bm
        if bm._cv2 is None:
            # OpenCV is not available, so skip the test.
            return

        netspec = design.NetSpecification([[1, 3, 5, 5], [1, 1, 5, 5]],
                                          inputs=['a', 'b'],
                                          phase=design.Phase.TRAIN)
        net = netspec.instantiate()

        dmon = CyclingDataMonitor(
            only_preload=['a', 'b'],
            X={'a': [np.zeros((3, 6, 6))],
               'b': [np.ones((1, 6, 6))]})
        tmon = ResizingMonitor(
            blobinfos={'a': 1, 'b': 2},
            base_scale=2.,
            random_change_up_to=0.5,
            net_input_size_adjustment_multiple_of=1,
            interp_methods={'a':'c', 'b':'n'}
        )
        kwargs = {'net': net,
                  'testnet': net,
                  'callback_signal': 'initialize_train'}
        tmon._initialize_train(kwargs)
        dmon._initialize_train(kwargs)

        dmon._pre_fit({'net': net, 'callback_signal': 'pre_fit'})
        tmon._pre_fit({'net': net, 'callback_signal': 'pre_fit'})
        kwargs = {'net': net, 'testnet': net}
        scales = []
        np.random.seed(1)
        for _ in range(1000):
            dmon._pre_train_batch(kwargs)
            tmon._pre_train_batch(kwargs)
            scales.append(net.blobs['a'].data.shape[2])
        from scipy.stats import chisquare, itemfreq
        freq = itemfreq(scales)[:, 1]
        _, pvalue = chisquare(freq)
        self.assertTrue(pvalue > 0.1)