Java Code Examples for com.google.common.collect.Multiset#size()

The following examples show how to use com.google.common.collect.Multiset#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestBucketBalancer.java From presto with Apache License 2.0

6 votes

private static void assertBalancing(BucketBalancer balancer, int expectedMoves)
{
    int actualMoves = balancer.balance();
    assertEquals(actualMoves, expectedMoves);

    // check that number of buckets per node is within bounds
    ClusterState clusterState = balancer.fetchClusterState();
    for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
        Multiset<String> allocationCounts = HashMultiset.create();
        clusterState.getDistributionAssignments().get(distribution).stream()
                .map(BucketAssignment::getNodeIdentifier)
                .forEach(allocationCounts::add);

        double bucketsPerNode = (1.0 * allocationCounts.size()) / clusterState.getActiveNodes().size();
        for (String node : allocationCounts) {
            assertGreaterThanOrEqual(allocationCounts.count(node), (int) Math.floor(bucketsPerNode), node + " has fewer buckets than expected");
            assertLessThanOrEqual(allocationCounts.count(node), (int) Math.ceil(bucketsPerNode), node + " has more buckets than expected");
        }
    }

    // check stability
    assertEquals(balancer.balance(), 0);
}

Example 2

Source File: EntityScorer.java From entity-fishing with Apache License 2.0

6 votes

public ScorerContext context(List<String> words) {
    Multiset<String> counter = TreeMultiset.create();
    counter.addAll(words);

    int word_dim = kb.getEmbeddingsSize();
    // word_vecs is the concatenation of all word vectors of the word list
    float[] word_vecs = new float[counter.size() * word_dim];
    IntArrayList word_counts = new IntArrayList();
    int n_words = 0;

    for(Multiset.Entry<String> entry : counter.entrySet()) {
        short[] vector = kb.getWordEmbeddings(entry.getElement());
        if (vector != null) {
            word_counts.add(entry.getCount());
            for (int i=0; i<kb.getEmbeddingsSize(); i++) {
                word_vecs[n_words * word_dim + i] = vector[i];
            }
            n_words += 1;
        }
    }
    word_counts.trim();

    return create_context(word_vecs, word_counts.elements());
}

Example 3

Source File: TagDict.java From EasySRL with Apache License 2.0

6 votes

private static void addEntryForWord(final Multiset<Category> countForCategory,
		final Map<String, Collection<Category>> result, final String word) {
	final List<Entry<Category>> cats = new ArrayList<>();
	for (final Entry<Category> catToCount : countForCategory.entrySet()) {
		cats.add(catToCount);
	}
	final int totalSize = countForCategory.size();
	final int minSize = Math.floorDiv(totalSize, 1000);
	Collections.sort(cats, comparator);
	final List<Category> cats2 = new ArrayList<>();

	for (final Entry<Category> entry : cats) {
		if (entry.getCount() >= minSize) {
			cats2.add(entry.getElement());
		}
	}

	result.put(word, cats2);
}

Example 4

Source File: NeuralNetworkTrainer.java From Word2VecJava with MIT License

6 votes

NeuralNetworkTrainer(NeuralNetworkConfig config, Multiset<String> vocab, Map<String, HuffmanNode> huffmanNodes, TrainingProgressListener listener) {
	this.config = config;
	this.huffmanNodes = huffmanNodes;
	this.listener = listener;
	this.vocabSize = huffmanNodes.size();
	this.numTrainedTokens = vocab.size();
	this.layer1_size = config.layerSize;
	this.window = config.windowSize;
	
	this.actualWordCount = new AtomicInteger();
	this.alpha = config.initialLearningRate;
	
	this.syn0 = new double[vocabSize][layer1_size];
	this.syn1 = new double[vocabSize][layer1_size];
	this.syn1neg = new double[vocabSize][layer1_size];
	this.table = new int[TABLE_SIZE];
	
	initializeSyn0();
	initializeUnigramTable();
}

Example 5

Source File: SingleSentencePartialCreditTestingStatistics.java From spf with GNU General Public License v2.0

5 votes

private static PartialCreditTriplet partialCompare(LogicalExpression gold,
		LogicalExpression label) {
	final Multiset<Pair<? extends LogicalExpression, ? extends LogicalExpression>> goldPairs = GetPredConstPairs
			.of(gold);
	final Multiset<Pair<? extends LogicalExpression, ? extends LogicalExpression>> labelPairs;
	if (label == null) {
		labelPairs = HashMultiset.create();
	} else {
		labelPairs = GetPredConstPairs.of(label);
	}

	// The "intersection" of the gold and label pair sets = the number of
	// matches
	final Multiset<Pair<? extends LogicalExpression, ? extends LogicalExpression>> intersection = HashMultiset
			.create();

	for (final Entry<Pair<? extends LogicalExpression, ? extends LogicalExpression>> entry : goldPairs
			.entrySet()) {
		intersection.setCount(
				entry.getElement(),
				Math.min(entry.getCount(),
						labelPairs.count(entry.getElement())));
	}

	return new PartialCreditTriplet(goldPairs.size(), labelPairs.size(),
			intersection.size());
}

Example 6

Source File: CutoffsDictionary.java From EasySRL with Apache License 2.0

5 votes

@Override
public boolean isFrequent(final Category category, final int argumentNumber, final SRLLabel label) {
	if (label == SRLFrame.NONE) {
		return true;
	}
	final Multiset<SRLLabel> countForCategory = categoryToArgumentToSRLs.get(category.withoutAnnotation(),
			argumentNumber);
	return countForCategory != null && countForCategory.size() >= minSlotSRL
			&& countForCategory.count(label) >= minSlotRole;
}

Example 7

Source File: DiscreteElementwiseConditionalDistribution.java From api-mining with GNU General Public License v3.0

5 votes

@Override
public double getMLProbability(final A element, final B given) {
	if (table.containsKey(given)) {
		final Multiset<A> elements = table.get(given);
		return ((double) elements.count(element)) / elements.size();
	} else {
		return 1;
	}
}

Example 8

Source File: DiscreteElementwiseConditionalDistribution.java From tassal with BSD 3-Clause "New" or "Revised" License

5 votes

@Override
public double getMLProbability(final A element, final B given) {
	if (table.containsKey(given)) {
		final Multiset<A> elements = table.get(given);
		return ((double) elements.count(element)) / elements.size();
	} else {
		return 1;
	}
}

Example 9

Source File: MultisetExpression.java From gef with Eclipse Public License 2.0

4 votes

@Override
public int size() {
	final Multiset<E> multiset = get();
	return (multiset == null) ? EMPTY_MULTISET.size() : multiset.size();
}

Example 10

Source File: FilterAlignmentArtifacts.java From gatk with BSD 3-Clause "New" or "Revised" License

4 votes

private List<byte[]> getUnitigs(final LocusIteratorByState libs) {
    final List<StringBuilder> unitigBuilders = new ArrayList<>();
    int lastCoveredLocus = Integer.MIN_VALUE;
    while (libs.hasNext()) {
        final ReadPileup pileup = libs.next().getBasePileup();
        if (pileup.isEmpty()) {
            continue;
        }

        // begin new unitig if this pileup isn't contiguous with the last
        final int currentLocus = pileup.getLocation().getStart();
        if (currentLocus != lastCoveredLocus + 1) {
            unitigBuilders.add(new StringBuilder());
        }
        lastCoveredLocus = currentLocus;
        final StringBuilder currentUnitigBuilder = unitigBuilders.get(unitigBuilders.size() - 1);

        // add no bases (deletion) or consensus bases.
        final int[] baseCounts = pileup.getBaseCounts();
        final int deletionCount = (int) Utils.stream(pileup).filter(PileupElement::isDeletion).count();
        if (deletionCount < pileup.size() / 2) {
            final byte consensusBase = BaseUtils.baseIndexToSimpleBase(MathUtils.maxElementIndex(baseCounts));
            currentUnitigBuilder.append((char) consensusBase);

            // in addition to consensus base, add inserted bases if needed
            final Multiset<String> insertedBases = Utils.stream(pileup)
                    .map(PileupElement::getBasesOfImmediatelyFollowingInsertion)
                    .filter(s -> s != null)
                    .collect(Collectors.toCollection(HashMultiset::create));

            if (insertedBases.size() > pileup.size() / 2) {
                final String consensusInsertion = Multisets.copyHighestCountFirst(insertedBases).entrySet().iterator().next().getElement();
                currentUnitigBuilder.append(consensusInsertion);
            }
        }
    }

    return unitigBuilders.stream()
            .map(builder -> builder.toString().getBytes())
            .filter(unitig -> unitig.length > MIN_UNITIG_LENGTH)
            .collect(Collectors.toList());
}

Example 11

Source File: SchemaSamplerTest.java From log-synth with Apache License 2.0

4 votes

private void check(Multiset<String> counts, double p, String s) {
    double n = counts.size();
    assertEquals(p, counts.count(s) / n, Math.sqrt(n * p * (n - p)));
}