Java Code Examples for com.google.common.collect.Multiset#entrySet()

The following examples show how to use com.google.common.collect.Multiset#entrySet() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SpecValidations.java From deploymentmanager-autogen with Apache License 2.0

6 votes

private static void validateMetadataKeyUniqueness(SingleVmDeploymentPackageSpec spec) {
  // Ensures that metadata keys are unique.
  Multiset<String> metadataKeyCounts = HashMultiset.create();
  for (PasswordSpec password : spec.getPasswordsList()) {
    metadataKeyCounts.add(password.getMetadataKey());
  }
  for (GceMetadataItem metadataItem : spec.getGceMetadataItemsList()) {
    metadataKeyCounts.add(metadataItem.getKey());
  }
  for (Multiset.Entry<String> entry : metadataKeyCounts.entrySet()) {
    if (entry.getCount() > 1) {
      throw new IllegalArgumentException(
          String.format("Metadata key '%s' is not unique", entry.getElement()));
    }
  }
}

Example 2

Source File: AbstractIdentifierRenamings.java From naturalize with BSD 3-Clause "New" or "Revised" License

6 votes

@Override
public SortedSet<Renaming> calculateScores(
		final Multiset<NGram<String>> ngrams,
		final Set<String> alternatives, final Scope scope) {
	final SortedSet<Renaming> scoreMap = Sets.newTreeSet();

	for (final String identifierName : alternatives) {
		double score = 0;
		for (final Entry<NGram<String>> ngram : ngrams.entrySet()) {
			try {
				final NGram<String> identNGram = NGram.substituteTokenWith(
						ngram.getElement(), WILDCARD_TOKEN, identifierName);
				final double ngramScore = scoreNgram(identNGram);
				score += DoubleMath.log2(ngramScore) * ngram.getCount();
			} catch (final Throwable e) {
				LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
			}
		}
		scoreMap.add(new Renaming(identifierName, (addScopePriors(
				identifierName, scope) - score) / ngrams.size(), ngrams
				.size() / ngramLM.getN(), scope));
	}

	return scoreMap;
}

Example 3

Source File: CorpusAnalysis.java From tac-kbp-eal with MIT License

6 votes

static void writeToChart(final Multiset<Symbol> counts, final File outFile,
    final GnuPlotRenderer renderer,
    final String chartTitle, final String xAxisLabel, final String yAxisLabel)
    throws IOException {

  final Axis X_AXIS = Axis.xAxis().setLabel(xAxisLabel).rotateLabels().build();
  final Axis Y_AXIS = Axis.yAxis().setLabel(yAxisLabel).build();

  final BarChart.Builder chartBuilder =
      BarChart.builder().setTitle(chartTitle).setXAxis(X_AXIS).setYAxis(Y_AXIS).hideKey();

  for (final Multiset.Entry<Symbol> e : counts.entrySet()) {
    chartBuilder
        .addBar(BarChart.Bar.builder(e.getCount()).setLabel(e.getElement().toString()).build());
  }

  renderer.renderTo(chartBuilder.build(), outFile);
}

Example 4

Source File: ValueTypeComposer.java From immutables with Apache License 2.0

6 votes

private void checkAttributeNamesForDuplicates(ValueType type, Protoclass protoclass) {
  if (!type.attributes.isEmpty()) {
    Multiset<String> attributeNames = HashMultiset.create(type.attributes.size());
    for (ValueAttribute attribute : type.attributes) {
      if (attribute.isGenerateLazy) {
        attributeNames.add(attribute.name() + "$lazy"); // making lazy compare in it's own scope
      } else {
        attributeNames.add(attribute.name());
      }
    }

    List<String> duplicates = Lists.newArrayList();
    for (Multiset.Entry<String> entry : attributeNames.entrySet()) {
      if (entry.getCount() > 1) {
        duplicates.add(entry.getElement().replace("$lazy", ""));
      }
    }

    if (!duplicates.isEmpty()) {
      protoclass.report()
          .error("Duplicate attribute names %s. You should check if correct @Value.Style applied",
              duplicates);
    }
  }
}

Example 5

Source File: TagDict.java From EasySRL with Apache License 2.0

6 votes

private static Map<String, Collection<Category>> makeDict(final Multiset<String> wordCounts,
		final Map<String, Multiset<Category>> wordToCatToCount) {
	// Now, save off a sorted list of categories
	final Multiset<Category> countsForOtherWords = HashMultiset.create();

	final Map<String, Collection<Category>> result = new HashMap<>();
	for (final Entry<String> wordAndCount : wordCounts.entrySet()) {
		final Multiset<Category> countForCategory = wordToCatToCount.get(wordAndCount.getElement());
		if (wordAndCount.getCount() > MIN_OCCURENCES_OF_WORD) {
			// Frequent word
			addEntryForWord(countForCategory, result, wordAndCount.getElement());
		} else {
			// Group stats for all rare words together.

			for (final Entry<Category> catToCount : countForCategory.entrySet()) {
				countsForOtherWords.add(catToCount.getElement(), catToCount.getCount());
			}
		}
	}
	addEntryForWord(countsForOtherWords, result, OTHER_WORDS);

	return ImmutableMap.copyOf(result);
}

Example 6

Source File: EntityScorer.java From entity-fishing with Apache License 2.0

6 votes

public ScorerContext context(List<String> words) {
    Multiset<String> counter = TreeMultiset.create();
    counter.addAll(words);

    int word_dim = kb.getEmbeddingsSize();
    // word_vecs is the concatenation of all word vectors of the word list
    float[] word_vecs = new float[counter.size() * word_dim];
    IntArrayList word_counts = new IntArrayList();
    int n_words = 0;

    for(Multiset.Entry<String> entry : counter.entrySet()) {
        short[] vector = kb.getWordEmbeddings(entry.getElement());
        if (vector != null) {
            word_counts.add(entry.getCount());
            for (int i=0; i<kb.getEmbeddingsSize(); i++) {
                word_vecs[n_words * word_dim + i] = vector[i];
            }
            n_words += 1;
        }
    }
    word_counts.trim();

    return create_context(word_vecs, word_counts.elements());
}

Example 7

Source File: NgramEnumerator.java From pyramid with Apache License 2.0

6 votes

public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF){
    Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
    String field = template.getField();
    Arrays.stream(ids).parallel().forEach(id -> {
        Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id);
        add(termVector,multiset,template);
    });
    Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
    for (Multiset.Entry entry: multiset.entrySet()){
        Ngram ngram = (Ngram)entry.getElement();
        int count = entry.getCount();
        if (count>=minDF){
            filtered.add(ngram,count);
        }
    }
    return filtered;
}

Example 8

Source File: ApplicationMasterService.java From twill with Apache License 2.0

6 votes

/**
 * Handling containers that are completed.
 */
private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
  Multiset<String> restartRunnables = HashMultiset.create();
  for (YarnContainerStatus status : completedContainersStatuses) {
    LOG.info("Container {} completed with {}:{}.",
             status.getContainerId(), status.getState(), status.getDiagnostics());
    runningContainers.handleCompleted(status, restartRunnables);
  }

  for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
    LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
    runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement(),  entry.getCount()));
  }

  // For all runnables that needs to re-request for containers, update the expected count timestamp
  // so that the EventHandler would triggered with the right expiration timestamp.
  expectedContainers.updateRequestTime(restartRunnables.elementSet());
}

Example 9

Source File: TagDict.java From easyccg with MIT License

6 votes

private static void addEntryForWord(Multiset<Category> countForCategory,
    Map<String, Collection<Category>> result, String word)
{
  List<Entry<Category>> cats = new ArrayList<Entry<Category>>();
  for (Entry<Category> catToCount : countForCategory.entrySet()) {
    cats.add(catToCount);
  }
  
  Collections.sort(cats, comparator);
  List<Category> cats2 = new ArrayList<Category>();
      
  for (Entry<Category> entry : cats) {
    cats2.add(entry.getElement());
  }
  
  result.put(word, cats2);
}

Example 10

Source File: JavaInputAstVisitor.java From java-n-IDE-for-Android with Apache License 2.0

6 votes

/**
 * Returns true if {@code atLeastM} of the expressions in the given column are the same kind.
 */
private static boolean expressionsAreParallel(
        List<List<ExpressionTree>> rows, int column, int atLeastM) {
    Multiset<Tree.Kind> nodeTypes = HashMultiset.create();
    for (List<? extends ExpressionTree> row : rows) {
        if (column >= row.size()) {
            continue;
        }
        nodeTypes.add(row.get(column).getKind());
    }
    for (Multiset.Entry<Tree.Kind> nodeType : nodeTypes.entrySet()) {
        if (nodeType.getCount() >= atLeastM) {
            return true;
        }
    }
    return false;
}

Example 11

Source File: JsonUtil.java From tac2015-event-detection with GNU General Public License v3.0

5 votes

public static <T> ObjectNode toJson(Multiset<T> counts) {
	ObjectNode jmap = newObject();
	for (Multiset.Entry<T> e : counts.entrySet()) {
		jmap.put(e.getElement().toString(), e.getCount());
	}
	return jmap;
}

Example 12

Source File: TypeConformanceComputer.java From xtext-extras with Eclipse Public License 2.0

5 votes

/**
 * Keeps the cumulated distance for all the common raw super types of the given references.
 * Interfaces that are more directly implemented will get a lower total count than more general
 * interfaces.
 */
protected void cumulateDistance(final List<LightweightTypeReference> references, Multimap<JvmType, LightweightTypeReference> all,
		Multiset<JvmType> cumulatedDistance) {
	for(LightweightTypeReference other: references) {
		Multiset<JvmType> otherDistance = LinkedHashMultiset.create();
		initializeDistance(other, all, otherDistance);
		cumulatedDistance.retainAll(otherDistance);
		for(Multiset.Entry<JvmType> typeToDistance: otherDistance.entrySet()) {
			if (cumulatedDistance.contains(typeToDistance.getElement()))
				cumulatedDistance.add(typeToDistance.getElement(), typeToDistance.getCount());
		}
	}
}

Example 13

Source File: RedisBungeeListener.java From RedisBungee with Eclipse Public License 1.0

5 votes

private void serializeMultiset(Multiset<String> collection, ByteArrayDataOutput output) {
    output.writeInt(collection.elementSet().size());
    for (Multiset.Entry<String> entry : collection.entrySet()) {
        output.writeUTF(entry.getElement());
        output.writeInt(entry.getCount());
    }
}

Example 14

Source File: TableShardingTest.java From mango with Apache License 2.0

5 votes

private void check(List<Msg> msgs) {
  List<Msg> dbMsgs = new ArrayList<Msg>();
  Multiset<Integer> ms = HashMultiset.create();
  for (Msg msg : msgs) {
    ms.add(msg.getUid());
  }
  for (Multiset.Entry<Integer> entry : ms.entrySet()) {
    dbMsgs.addAll(dao.getMsgs(entry.getElement()));
  }
  assertThat(dbMsgs, hasSize(msgs.size()));
  assertThat(dbMsgs, containsInAnyOrder(msgs.toArray()));
}

Example 15

Source File: SampleUtils.java From api-mining with GNU General Public License v3.0

5 votes

/**
 * Get a uniformly random element from a Multiset.
 * 
 * @param set
 * @return
 */
public static <T> T getRandomElement(final Multiset<T> set) {
	final int randPos = RandomUtils.nextInt(checkNotNull(set).size());

	T selected = null;
	int i = 0;
	for (final Multiset.Entry<T> entry : set.entrySet()) {
		i += entry.getCount();
		if (i > randPos) {
			selected = entry.getElement();
			break;
		}
	}
	return selected;
}

Example 16

Source File: MostFrequentCharInString.java From levelup-java-examples with Apache License 2.0

5 votes

@Test
public void most_frequent_char_guava() throws IOException {

	Multiset<String> frequentCharacters = HashMultiset.create(Splitter
			.fixedLength(1).split(sentence.toLowerCase()));

	for (Entry<String> item : frequentCharacters.entrySet()) {
		System.out.println(item.getElement() + ":" + item.getCount());
	}

	assertEquals(7, frequentCharacters.count("e"), 0);
}

Example 17

Source File: SampleUtils.java From tassal with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Get a uniformly random element from a Multiset.
 * 
 * @param set
 * @return
 */
public static <T> T getRandomElement(final Multiset<T> set) {
	final int randPos = RandomUtils.nextInt(checkNotNull(set).size());

	T selected = null;
	int i = 0;
	for (final Multiset.Entry<T> entry : set.entrySet()) {
		i += entry.getCount();
		if (i > randPos) {
			selected = entry.getElement();
			break;
		}
	}
	return selected;
}

Example 18

Source File: MultisetExpression.java From gef with Eclipse Public License 2.0

4 votes

@Override
public Set<com.google.common.collect.Multiset.Entry<E>> entrySet() {
	final Multiset<E> multiset = get();
	return (multiset == null) ? EMPTY_MULTISET.entrySet()
			: multiset.entrySet();
}

Example 19

Source File: App1.java From pyramid with Apache License 2.0

4 votes

static Set<Ngram> gather(Config config, ESIndex index,
                             String[] ids, Logger logger) throws Exception{

        File metaDataFolder = new File(config.getString("output.folder"),"meta_data");
        metaDataFolder.mkdirs();

        Multiset<Ngram> allNgrams = ConcurrentHashMultiset.create();
        List<Integer> ns = config.getIntegers("train.feature.ngram.n");
        double minDf = config.getDouble("train.feature.ngram.minDf");
        int minDFrequency = (int)Math.floor(ids.length*minDf);
        List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
        List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
        boolean inorder = config.getBoolean("train.feature.ngram.inOrder");
        boolean allowDuplicates = config.getBoolean("train.feature.ngram.allowDuplicateWords");
        for (String field: fields){
            for (int n: ns){
                for (int slop:slops){
                    logger.info("gathering "+n+ "-grams from field "+field+" with slop "+slop+" and minDf "+minDf+ ", (actual frequency threshold = "+minDFrequency+")");
                    NgramTemplate template = new NgramTemplate(field,n,slop);
                    Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
                    logger.info("gathered "+ngrams.elementSet().size()+ " ngrams");
                    int newCounter = 0;
                    for (Multiset.Entry<Ngram> entry: ngrams.entrySet()){
                        Ngram ngram = entry.getElement();
                        ngram.setInOrder(inorder);
                        int count = entry.getCount();
                        if (interesting(allNgrams,ngram,count)){
                            if (allowDuplicates) {
                                allNgrams.add(ngram, count);
                                newCounter += 1;
                            }else{
                                if (!ngram.hasDuplicate()){
                                    allNgrams.add(ngram, count);
                                    newCounter += 1;
                                }
                            }
                        }

                    }
                    logger.info(newCounter+" are really new");
                }
            }
        }
        logger.info("there are "+allNgrams.elementSet().size()+" ngrams in total");
//        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(metaDataFolder,"all_ngrams.txt")));
//        for (Multiset.Entry<Ngram> ngramEntry: allNgrams.entrySet()){
//            bufferedWriter.write(ngramEntry.getElement().toString());
//            bufferedWriter.write("\t");
//            bufferedWriter.write(""+ngramEntry.getCount());
//            bufferedWriter.newLine();
//        }
//
//        bufferedWriter.close();
//
//        //for serialization
//        Set<Ngram> uniques = new HashSet<>();
//        uniques.addAll(allNgrams.elementSet());
//        Serialization.serialize(uniques, new File(metaDataFolder, "all_ngrams.ser"));
        return allNgrams.elementSet();
    }

Example 20

Source File: TagDict.java From easyccg with MIT License

4 votes

/**
 * Finds the set of categories used for each word in a corpus
 */
public static Map<String, Collection<Category>> makeDict(Iterable<InputToParser> input) {
  Multiset<String> wordCounts = HashMultiset.create();
  Map<String, Multiset<Category>> wordToCatToCount = new HashMap<String, Multiset<Category>>();
  
  // First, count how many times each word occurs with each category
  for (InputToParser sentence : input) {
    for (int i=0; i<sentence.getInputWords().size(); i++) {
      String word = sentence.getInputWords().get(i).word;
      Category cat = sentence.getGoldCategories().get(i);
      wordCounts.add(word);
      
      if (!wordToCatToCount.containsKey(word)) {
        Multiset<Category> tmp = HashMultiset.create();
        wordToCatToCount.put(word, tmp);
      }

      wordToCatToCount.get(word).add(cat);
    }
  }
  

  // Now, save off a sorted list of categories
  Multiset<Category> countsForOtherWords = HashMultiset.create();
  
  Map<String, Collection<Category>> result = new HashMap<String, Collection<Category>>();
  for (Entry<String> wordAndCount : wordCounts.entrySet()) {
    Multiset<Category> countForCategory = wordToCatToCount.get(wordAndCount.getElement());
    if (wordAndCount.getCount() > MIN_OCCURENCES_OF_WORD) {
      // Frequent word
      addEntryForWord(countForCategory, result, wordAndCount.getElement());
    } else {
      // Group stats for all rare words together.
      
      for (Entry<Category> catToCount : countForCategory.entrySet()) {
        countsForOtherWords.add(catToCount.getElement(), catToCount.getCount());
      }
    }
  }
  addEntryForWord(countsForOtherWords, result, OTHER_WORDS);


  return ImmutableMap.copyOf(result);
}