Java Code Examples for com.google.common.collect.Multiset#entrySet()

The following examples show how to use com.google.common.collect.Multiset#entrySet() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SpecValidations.java    From deploymentmanager-autogen with Apache License 2.0 6 votes vote down vote up
private static void validateMetadataKeyUniqueness(SingleVmDeploymentPackageSpec spec) {
  // Ensures that metadata keys are unique.
  Multiset<String> metadataKeyCounts = HashMultiset.create();
  for (PasswordSpec password : spec.getPasswordsList()) {
    metadataKeyCounts.add(password.getMetadataKey());
  }
  for (GceMetadataItem metadataItem : spec.getGceMetadataItemsList()) {
    metadataKeyCounts.add(metadataItem.getKey());
  }
  for (Multiset.Entry<String> entry : metadataKeyCounts.entrySet()) {
    if (entry.getCount() > 1) {
      throw new IllegalArgumentException(
          String.format("Metadata key '%s' is not unique", entry.getElement()));
    }
  }
}
 
Example 2
Source File: AbstractIdentifierRenamings.java    From naturalize with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public SortedSet<Renaming> calculateScores(
		final Multiset<NGram<String>> ngrams,
		final Set<String> alternatives, final Scope scope) {
	final SortedSet<Renaming> scoreMap = Sets.newTreeSet();

	for (final String identifierName : alternatives) {
		double score = 0;
		for (final Entry<NGram<String>> ngram : ngrams.entrySet()) {
			try {
				final NGram<String> identNGram = NGram.substituteTokenWith(
						ngram.getElement(), WILDCARD_TOKEN, identifierName);
				final double ngramScore = scoreNgram(identNGram);
				score += DoubleMath.log2(ngramScore) * ngram.getCount();
			} catch (final Throwable e) {
				LOGGER.warning(ExceptionUtils.getFullStackTrace(e));
			}
		}
		scoreMap.add(new Renaming(identifierName, (addScopePriors(
				identifierName, scope) - score) / ngrams.size(), ngrams
				.size() / ngramLM.getN(), scope));
	}

	return scoreMap;
}
 
Example 3
Source File: CorpusAnalysis.java    From tac-kbp-eal with MIT License 6 votes vote down vote up
static void writeToChart(final Multiset<Symbol> counts, final File outFile,
    final GnuPlotRenderer renderer,
    final String chartTitle, final String xAxisLabel, final String yAxisLabel)
    throws IOException {

  final Axis X_AXIS = Axis.xAxis().setLabel(xAxisLabel).rotateLabels().build();
  final Axis Y_AXIS = Axis.yAxis().setLabel(yAxisLabel).build();

  final BarChart.Builder chartBuilder =
      BarChart.builder().setTitle(chartTitle).setXAxis(X_AXIS).setYAxis(Y_AXIS).hideKey();

  for (final Multiset.Entry<Symbol> e : counts.entrySet()) {
    chartBuilder
        .addBar(BarChart.Bar.builder(e.getCount()).setLabel(e.getElement().toString()).build());
  }

  renderer.renderTo(chartBuilder.build(), outFile);
}
 
Example 4
Source File: ValueTypeComposer.java    From immutables with Apache License 2.0 6 votes vote down vote up
private void checkAttributeNamesForDuplicates(ValueType type, Protoclass protoclass) {
  if (!type.attributes.isEmpty()) {
    Multiset<String> attributeNames = HashMultiset.create(type.attributes.size());
    for (ValueAttribute attribute : type.attributes) {
      if (attribute.isGenerateLazy) {
        attributeNames.add(attribute.name() + "$lazy"); // making lazy compare in it's own scope
      } else {
        attributeNames.add(attribute.name());
      }
    }

    List<String> duplicates = Lists.newArrayList();
    for (Multiset.Entry<String> entry : attributeNames.entrySet()) {
      if (entry.getCount() > 1) {
        duplicates.add(entry.getElement().replace("$lazy", ""));
      }
    }

    if (!duplicates.isEmpty()) {
      protoclass.report()
          .error("Duplicate attribute names %s. You should check if correct @Value.Style applied",
              duplicates);
    }
  }
}
 
Example 5
Source File: TagDict.java    From EasySRL with Apache License 2.0 6 votes vote down vote up
private static Map<String, Collection<Category>> makeDict(final Multiset<String> wordCounts,
		final Map<String, Multiset<Category>> wordToCatToCount) {
	// Now, save off a sorted list of categories
	final Multiset<Category> countsForOtherWords = HashMultiset.create();

	final Map<String, Collection<Category>> result = new HashMap<>();
	for (final Entry<String> wordAndCount : wordCounts.entrySet()) {
		final Multiset<Category> countForCategory = wordToCatToCount.get(wordAndCount.getElement());
		if (wordAndCount.getCount() > MIN_OCCURENCES_OF_WORD) {
			// Frequent word
			addEntryForWord(countForCategory, result, wordAndCount.getElement());
		} else {
			// Group stats for all rare words together.

			for (final Entry<Category> catToCount : countForCategory.entrySet()) {
				countsForOtherWords.add(catToCount.getElement(), catToCount.getCount());
			}
		}
	}
	addEntryForWord(countsForOtherWords, result, OTHER_WORDS);

	return ImmutableMap.copyOf(result);
}
 
Example 6
Source File: EntityScorer.java    From entity-fishing with Apache License 2.0 6 votes vote down vote up
public ScorerContext context(List<String> words) {
    Multiset<String> counter = TreeMultiset.create();
    counter.addAll(words);

    int word_dim = kb.getEmbeddingsSize();
    // word_vecs is the concatenation of all word vectors of the word list
    float[] word_vecs = new float[counter.size() * word_dim];
    IntArrayList word_counts = new IntArrayList();
    int n_words = 0;

    for(Multiset.Entry<String> entry : counter.entrySet()) {
        short[] vector = kb.getWordEmbeddings(entry.getElement());
        if (vector != null) {
            word_counts.add(entry.getCount());
            for (int i=0; i<kb.getEmbeddingsSize(); i++) {
                word_vecs[n_words * word_dim + i] = vector[i];
            }
            n_words += 1;
        }
    }
    word_counts.trim();

    return create_context(word_vecs, word_counts.elements());
}
 
Example 7
Source File: NgramEnumerator.java    From pyramid with Apache License 2.0 6 votes vote down vote up
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF){
    Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
    String field = template.getField();
    Arrays.stream(ids).parallel().forEach(id -> {
        Map<Integer,String> termVector = index.getTermVectorFromIndex(field, id);
        add(termVector,multiset,template);
    });
    Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
    for (Multiset.Entry entry: multiset.entrySet()){
        Ngram ngram = (Ngram)entry.getElement();
        int count = entry.getCount();
        if (count>=minDF){
            filtered.add(ngram,count);
        }
    }
    return filtered;
}
 
Example 8
Source File: ApplicationMasterService.java    From twill with Apache License 2.0 6 votes vote down vote up
/**
 * Handling containers that are completed.
 */
private void handleCompleted(List<YarnContainerStatus> completedContainersStatuses) {
  Multiset<String> restartRunnables = HashMultiset.create();
  for (YarnContainerStatus status : completedContainersStatuses) {
    LOG.info("Container {} completed with {}:{}.",
             status.getContainerId(), status.getState(), status.getDiagnostics());
    runningContainers.handleCompleted(status, restartRunnables);
  }

  for (Multiset.Entry<String> entry : restartRunnables.entrySet()) {
    LOG.info("Re-request container for {} with {} instances.", entry.getElement(), entry.getCount());
    runnableContainerRequests.add(createRunnableContainerRequest(entry.getElement(),  entry.getCount()));
  }

  // For all runnables that needs to re-request for containers, update the expected count timestamp
  // so that the EventHandler would triggered with the right expiration timestamp.
  expectedContainers.updateRequestTime(restartRunnables.elementSet());
}
 
Example 9
Source File: TagDict.java    From easyccg with MIT License 6 votes vote down vote up
private static void addEntryForWord(Multiset<Category> countForCategory,
    Map<String, Collection<Category>> result, String word)
{
  List<Entry<Category>> cats = new ArrayList<Entry<Category>>();
  for (Entry<Category> catToCount : countForCategory.entrySet()) {
    cats.add(catToCount);
  }
  
  Collections.sort(cats, comparator);
  List<Category> cats2 = new ArrayList<Category>();
      
  for (Entry<Category> entry : cats) {
    cats2.add(entry.getElement());
  }
  
  result.put(word, cats2);
}
 
Example 10
Source File: JavaInputAstVisitor.java    From java-n-IDE-for-Android with Apache License 2.0 6 votes vote down vote up
/**
 * Returns true if {@code atLeastM} of the expressions in the given column are the same kind.
 */
private static boolean expressionsAreParallel(
        List<List<ExpressionTree>> rows, int column, int atLeastM) {
    Multiset<Tree.Kind> nodeTypes = HashMultiset.create();
    for (List<? extends ExpressionTree> row : rows) {
        if (column >= row.size()) {
            continue;
        }
        nodeTypes.add(row.get(column).getKind());
    }
    for (Multiset.Entry<Tree.Kind> nodeType : nodeTypes.entrySet()) {
        if (nodeType.getCount() >= atLeastM) {
            return true;
        }
    }
    return false;
}
 
Example 11
Source File: JsonUtil.java    From tac2015-event-detection with GNU General Public License v3.0 5 votes vote down vote up
public static <T> ObjectNode toJson(Multiset<T> counts) {
	ObjectNode jmap = newObject();
	for (Multiset.Entry<T> e : counts.entrySet()) {
		jmap.put(e.getElement().toString(), e.getCount());
	}
	return jmap;
}
 
Example 12
Source File: TypeConformanceComputer.java    From xtext-extras with Eclipse Public License 2.0 5 votes vote down vote up
/**
 * Keeps the cumulated distance for all the common raw super types of the given references.
 * Interfaces that are more directly implemented will get a lower total count than more general
 * interfaces.
 */
protected void cumulateDistance(final List<LightweightTypeReference> references, Multimap<JvmType, LightweightTypeReference> all,
		Multiset<JvmType> cumulatedDistance) {
	for(LightweightTypeReference other: references) {
		Multiset<JvmType> otherDistance = LinkedHashMultiset.create();
		initializeDistance(other, all, otherDistance);
		cumulatedDistance.retainAll(otherDistance);
		for(Multiset.Entry<JvmType> typeToDistance: otherDistance.entrySet()) {
			if (cumulatedDistance.contains(typeToDistance.getElement()))
				cumulatedDistance.add(typeToDistance.getElement(), typeToDistance.getCount());
		}
	}
}
 
Example 13
Source File: RedisBungeeListener.java    From RedisBungee with Eclipse Public License 1.0 5 votes vote down vote up
private void serializeMultiset(Multiset<String> collection, ByteArrayDataOutput output) {
    output.writeInt(collection.elementSet().size());
    for (Multiset.Entry<String> entry : collection.entrySet()) {
        output.writeUTF(entry.getElement());
        output.writeInt(entry.getCount());
    }
}
 
Example 14
Source File: TableShardingTest.java    From mango with Apache License 2.0 5 votes vote down vote up
private void check(List<Msg> msgs) {
  List<Msg> dbMsgs = new ArrayList<Msg>();
  Multiset<Integer> ms = HashMultiset.create();
  for (Msg msg : msgs) {
    ms.add(msg.getUid());
  }
  for (Multiset.Entry<Integer> entry : ms.entrySet()) {
    dbMsgs.addAll(dao.getMsgs(entry.getElement()));
  }
  assertThat(dbMsgs, hasSize(msgs.size()));
  assertThat(dbMsgs, containsInAnyOrder(msgs.toArray()));
}
 
Example 15
Source File: SampleUtils.java    From api-mining with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Get a uniformly random element from a Multiset.
 * 
 * @param set
 * @return
 */
public static <T> T getRandomElement(final Multiset<T> set) {
	final int randPos = RandomUtils.nextInt(checkNotNull(set).size());

	T selected = null;
	int i = 0;
	for (final Multiset.Entry<T> entry : set.entrySet()) {
		i += entry.getCount();
		if (i > randPos) {
			selected = entry.getElement();
			break;
		}
	}
	return selected;
}
 
Example 16
Source File: MostFrequentCharInString.java    From levelup-java-examples with Apache License 2.0 5 votes vote down vote up
@Test
public void most_frequent_char_guava() throws IOException {

	Multiset<String> frequentCharacters = HashMultiset.create(Splitter
			.fixedLength(1).split(sentence.toLowerCase()));

	for (Entry<String> item : frequentCharacters.entrySet()) {
		System.out.println(item.getElement() + ":" + item.getCount());
	}

	assertEquals(7, frequentCharacters.count("e"), 0);
}
 
Example 17
Source File: SampleUtils.java    From tassal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Get a uniformly random element from a Multiset.
 * 
 * @param set
 * @return
 */
public static <T> T getRandomElement(final Multiset<T> set) {
	final int randPos = RandomUtils.nextInt(checkNotNull(set).size());

	T selected = null;
	int i = 0;
	for (final Multiset.Entry<T> entry : set.entrySet()) {
		i += entry.getCount();
		if (i > randPos) {
			selected = entry.getElement();
			break;
		}
	}
	return selected;
}
 
Example 18
Source File: MultisetExpression.java    From gef with Eclipse Public License 2.0 4 votes vote down vote up
@Override
public Set<com.google.common.collect.Multiset.Entry<E>> entrySet() {
	final Multiset<E> multiset = get();
	return (multiset == null) ? EMPTY_MULTISET.entrySet()
			: multiset.entrySet();
}
 
Example 19
Source File: App1.java    From pyramid with Apache License 2.0 4 votes vote down vote up
static Set<Ngram> gather(Config config, ESIndex index,
                             String[] ids, Logger logger) throws Exception{

        File metaDataFolder = new File(config.getString("output.folder"),"meta_data");
        metaDataFolder.mkdirs();

        Multiset<Ngram> allNgrams = ConcurrentHashMultiset.create();
        List<Integer> ns = config.getIntegers("train.feature.ngram.n");
        double minDf = config.getDouble("train.feature.ngram.minDf");
        int minDFrequency = (int)Math.floor(ids.length*minDf);
        List<String> fields = config.getStrings("train.feature.ngram.extractionFields");
        List<Integer> slops = config.getIntegers("train.feature.ngram.slop");
        boolean inorder = config.getBoolean("train.feature.ngram.inOrder");
        boolean allowDuplicates = config.getBoolean("train.feature.ngram.allowDuplicateWords");
        for (String field: fields){
            for (int n: ns){
                for (int slop:slops){
                    logger.info("gathering "+n+ "-grams from field "+field+" with slop "+slop+" and minDf "+minDf+ ", (actual frequency threshold = "+minDFrequency+")");
                    NgramTemplate template = new NgramTemplate(field,n,slop);
                    Multiset<Ngram> ngrams = NgramEnumerator.gatherNgram(index, ids, template, minDFrequency);
                    logger.info("gathered "+ngrams.elementSet().size()+ " ngrams");
                    int newCounter = 0;
                    for (Multiset.Entry<Ngram> entry: ngrams.entrySet()){
                        Ngram ngram = entry.getElement();
                        ngram.setInOrder(inorder);
                        int count = entry.getCount();
                        if (interesting(allNgrams,ngram,count)){
                            if (allowDuplicates) {
                                allNgrams.add(ngram, count);
                                newCounter += 1;
                            }else{
                                if (!ngram.hasDuplicate()){
                                    allNgrams.add(ngram, count);
                                    newCounter += 1;
                                }
                            }
                        }

                    }
                    logger.info(newCounter+" are really new");
                }
            }
        }
        logger.info("there are "+allNgrams.elementSet().size()+" ngrams in total");
//        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(metaDataFolder,"all_ngrams.txt")));
//        for (Multiset.Entry<Ngram> ngramEntry: allNgrams.entrySet()){
//            bufferedWriter.write(ngramEntry.getElement().toString());
//            bufferedWriter.write("\t");
//            bufferedWriter.write(""+ngramEntry.getCount());
//            bufferedWriter.newLine();
//        }
//
//        bufferedWriter.close();
//
//        //for serialization
//        Set<Ngram> uniques = new HashSet<>();
//        uniques.addAll(allNgrams.elementSet());
//        Serialization.serialize(uniques, new File(metaDataFolder, "all_ngrams.ser"));
        return allNgrams.elementSet();
    }
 
Example 20
Source File: TagDict.java    From easyccg with MIT License 4 votes vote down vote up
/**
 * Finds the set of categories used for each word in a corpus
 */
public static Map<String, Collection<Category>> makeDict(Iterable<InputToParser> input) {
  Multiset<String> wordCounts = HashMultiset.create();
  Map<String, Multiset<Category>> wordToCatToCount = new HashMap<String, Multiset<Category>>();
  
  // First, count how many times each word occurs with each category
  for (InputToParser sentence : input) {
    for (int i=0; i<sentence.getInputWords().size(); i++) {
      String word = sentence.getInputWords().get(i).word;
      Category cat = sentence.getGoldCategories().get(i);
      wordCounts.add(word);
      
      if (!wordToCatToCount.containsKey(word)) {
        Multiset<Category> tmp = HashMultiset.create();
        wordToCatToCount.put(word, tmp);
      }

      wordToCatToCount.get(word).add(cat);
    }
  }
  

  // Now, save off a sorted list of categories
  Multiset<Category> countsForOtherWords = HashMultiset.create();
  
  Map<String, Collection<Category>> result = new HashMap<String, Collection<Category>>();
  for (Entry<String> wordAndCount : wordCounts.entrySet()) {
    Multiset<Category> countForCategory = wordToCatToCount.get(wordAndCount.getElement());
    if (wordAndCount.getCount() > MIN_OCCURENCES_OF_WORD) {
      // Frequent word
      addEntryForWord(countForCategory, result, wordAndCount.getElement());
    } else {
      // Group stats for all rare words together.
      
      for (Entry<Category> catToCount : countForCategory.entrySet()) {
        countsForOtherWords.add(catToCount.getElement(), catToCount.getCount());
      }
    }
  }
  addEntryForWord(countsForOtherWords, result, OTHER_WORDS);


  return ImmutableMap.copyOf(result);
}