Java Code Examples for com.google.common.collect.Multiset#size()

The following examples show how to use com.google.common.collect.Multiset#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBucketBalancer.java    From presto with Apache License 2.0 6 votes vote down vote up
private static void assertBalancing(BucketBalancer balancer, int expectedMoves)
{
    int actualMoves = balancer.balance();
    assertEquals(actualMoves, expectedMoves);

    // check that number of buckets per node is within bounds
    ClusterState clusterState = balancer.fetchClusterState();
    for (Distribution distribution : clusterState.getDistributionAssignments().keySet()) {
        Multiset<String> allocationCounts = HashMultiset.create();
        clusterState.getDistributionAssignments().get(distribution).stream()
                .map(BucketAssignment::getNodeIdentifier)
                .forEach(allocationCounts::add);

        double bucketsPerNode = (1.0 * allocationCounts.size()) / clusterState.getActiveNodes().size();
        for (String node : allocationCounts) {
            assertGreaterThanOrEqual(allocationCounts.count(node), (int) Math.floor(bucketsPerNode), node + " has fewer buckets than expected");
            assertLessThanOrEqual(allocationCounts.count(node), (int) Math.ceil(bucketsPerNode), node + " has more buckets than expected");
        }
    }

    // check stability
    assertEquals(balancer.balance(), 0);
}
 
Example 2
Source File: EntityScorer.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
public ScorerContext context(List<String> words) {
    Multiset<String> counter = TreeMultiset.create();
    counter.addAll(words);

    int word_dim = kb.getEmbeddingsSize();
    // word_vecs is the concatenation of all word vectors of the word list
    float[] word_vecs = new float[counter.size() * word_dim];
    IntArrayList word_counts = new IntArrayList();
    int n_words = 0;

    for(Multiset.Entry<String> entry : counter.entrySet()) {
        short[] vector = kb.getWordEmbeddings(entry.getElement());
        if (vector != null) {
            word_counts.add(entry.getCount());
            for (int i=0; i<kb.getEmbeddingsSize(); i++) {
                word_vecs[n_words * word_dim + i] = vector[i];
            }
            n_words += 1;
        }
    }
    word_counts.trim();

    return create_context(word_vecs, word_counts.elements());
}
 
Example 3
Source File: TagDict.java    From EasySRL with Apache License 2.0 6 votes vote down vote up
private static void addEntryForWord(final Multiset<Category> countForCategory,
		final Map<String, Collection<Category>> result, final String word) {
	final List<Entry<Category>> cats = new ArrayList<>();
	for (final Entry<Category> catToCount : countForCategory.entrySet()) {
		cats.add(catToCount);
	}
	final int totalSize = countForCategory.size();
	final int minSize = Math.floorDiv(totalSize, 1000);
	Collections.sort(cats, comparator);
	final List<Category> cats2 = new ArrayList<>();

	for (final Entry<Category> entry : cats) {
		if (entry.getCount() >= minSize) {
			cats2.add(entry.getElement());
		}
	}

	result.put(word, cats2);
}
 
Example 4
Source File: NeuralNetworkTrainer.java    From Word2VecJava with MIT License 6 votes vote down vote up
NeuralNetworkTrainer(NeuralNetworkConfig config, Multiset<String> vocab, Map<String, HuffmanNode> huffmanNodes, TrainingProgressListener listener) {
	this.config = config;
	this.huffmanNodes = huffmanNodes;
	this.listener = listener;
	this.vocabSize = huffmanNodes.size();
	this.numTrainedTokens = vocab.size();
	this.layer1_size = config.layerSize;
	this.window = config.windowSize;
	
	this.actualWordCount = new AtomicInteger();
	this.alpha = config.initialLearningRate;
	
	this.syn0 = new double[vocabSize][layer1_size];
	this.syn1 = new double[vocabSize][layer1_size];
	this.syn1neg = new double[vocabSize][layer1_size];
	this.table = new int[TABLE_SIZE];
	
	initializeSyn0();
	initializeUnigramTable();
}
 
Example 5
Source File: SingleSentencePartialCreditTestingStatistics.java    From spf with GNU General Public License v2.0 5 votes vote down vote up
private static PartialCreditTriplet partialCompare(LogicalExpression gold,
		LogicalExpression label) {
	final Multiset<Pair<? extends LogicalExpression, ? extends LogicalExpression>> goldPairs = GetPredConstPairs
			.of(gold);
	final Multiset<Pair<? extends LogicalExpression, ? extends LogicalExpression>> labelPairs;
	if (label == null) {
		labelPairs = HashMultiset.create();
	} else {
		labelPairs = GetPredConstPairs.of(label);
	}

	// The "intersection" of the gold and label pair sets = the number of
	// matches
	final Multiset<Pair<? extends LogicalExpression, ? extends LogicalExpression>> intersection = HashMultiset
			.create();

	for (final Entry<Pair<? extends LogicalExpression, ? extends LogicalExpression>> entry : goldPairs
			.entrySet()) {
		intersection.setCount(
				entry.getElement(),
				Math.min(entry.getCount(),
						labelPairs.count(entry.getElement())));
	}

	return new PartialCreditTriplet(goldPairs.size(), labelPairs.size(),
			intersection.size());
}
 
Example 6
Source File: CutoffsDictionary.java    From EasySRL with Apache License 2.0 5 votes vote down vote up
@Override
public boolean isFrequent(final Category category, final int argumentNumber, final SRLLabel label) {
	if (label == SRLFrame.NONE) {
		return true;
	}
	final Multiset<SRLLabel> countForCategory = categoryToArgumentToSRLs.get(category.withoutAnnotation(),
			argumentNumber);
	return countForCategory != null && countForCategory.size() >= minSlotSRL
			&& countForCategory.count(label) >= minSlotRole;
}
 
Example 7
Source File: DiscreteElementwiseConditionalDistribution.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
@Override
public double getMLProbability(final A element, final B given) {
	if (table.containsKey(given)) {
		final Multiset<A> elements = table.get(given);
		return ((double) elements.count(element)) / elements.size();
	} else {
		return 1;
	}
}
 
Example 8
Source File: DiscreteElementwiseConditionalDistribution.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public double getMLProbability(final A element, final B given) {
	if (table.containsKey(given)) {
		final Multiset<A> elements = table.get(given);
		return ((double) elements.count(element)) / elements.size();
	} else {
		return 1;
	}
}
 
Example 9
Source File: MultisetExpression.java    From gef with Eclipse Public License 2.0 4 votes vote down vote up
@Override
public int size() {
	final Multiset<E> multiset = get();
	return (multiset == null) ? EMPTY_MULTISET.size() : multiset.size();
}
 
Example 10
Source File: FilterAlignmentArtifacts.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private List<byte[]> getUnitigs(final LocusIteratorByState libs) {
    final List<StringBuilder> unitigBuilders = new ArrayList<>();
    int lastCoveredLocus = Integer.MIN_VALUE;
    while (libs.hasNext()) {
        final ReadPileup pileup = libs.next().getBasePileup();
        if (pileup.isEmpty()) {
            continue;
        }

        // begin new unitig if this pileup isn't contiguous with the last
        final int currentLocus = pileup.getLocation().getStart();
        if (currentLocus != lastCoveredLocus + 1) {
            unitigBuilders.add(new StringBuilder());
        }
        lastCoveredLocus = currentLocus;
        final StringBuilder currentUnitigBuilder = unitigBuilders.get(unitigBuilders.size() - 1);

        // add no bases (deletion) or consensus bases.
        final int[] baseCounts = pileup.getBaseCounts();
        final int deletionCount = (int) Utils.stream(pileup).filter(PileupElement::isDeletion).count();
        if (deletionCount < pileup.size() / 2) {
            final byte consensusBase = BaseUtils.baseIndexToSimpleBase(MathUtils.maxElementIndex(baseCounts));
            currentUnitigBuilder.append((char) consensusBase);

            // in addition to consensus base, add inserted bases if needed
            final Multiset<String> insertedBases = Utils.stream(pileup)
                    .map(PileupElement::getBasesOfImmediatelyFollowingInsertion)
                    .filter(s -> s != null)
                    .collect(Collectors.toCollection(HashMultiset::create));

            if (insertedBases.size() > pileup.size() / 2) {
                final String consensusInsertion = Multisets.copyHighestCountFirst(insertedBases).entrySet().iterator().next().getElement();
                currentUnitigBuilder.append(consensusInsertion);
            }
        }
    }

    return unitigBuilders.stream()
            .map(builder -> builder.toString().getBytes())
            .filter(unitig -> unitig.length > MIN_UNITIG_LENGTH)
            .collect(Collectors.toList());
}
 
Example 11
Source File: SchemaSamplerTest.java    From log-synth with Apache License 2.0 4 votes vote down vote up
private void check(Multiset<String> counts, double p, String s) {
    double n = counts.size();
    assertEquals(p, counts.count(s) / n, Math.sqrt(n * p * (n - p)));
}