it.unimi.dsi.fastutil.objects.Object2IntMap Java Examples

The following examples show how to use it.unimi.dsi.fastutil.objects.Object2IntMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReadCountCollection.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Rearrange the targets so that they are in a particular order.
 * @return a new collection.
 * @throws IllegalArgumentException if any of the following is true:
 * <ul>
 *     <li>{@code targetsInOrder} is {@code null},</li>
 *     <li>is empty,</li>
 *     <li>it contains {@code null},</li>
 *     <li>contains any target not present in this collection.</li>
 * </ul>
 */
public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) {
    Utils.nonNull(targetsInOrder);
    Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty");
    final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size());
    final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size());
    for (int i = 0; i < targets.size(); i++) {
        targetToIndex.put(targets.get(i), i);
    }
    for (int i = 0; i < targetsInOrder.size(); i++) {
        final Target target = targetsInOrder.get(i);
        Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName()));
        counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target)));
    }
    return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false);
}
 
Example #2
Source File: AtlasApplicationWriter.java    From atlas with Apache License 2.0 6 votes vote down vote up
private void processFileWriter(FileWriter fileWriter) {
    if (AtlasD8.deepShrink) {
        System.out.println("start to deepShrink of dx");
        Object mixedSectionOffsets = ReflectUtils.getField(fileWriter, "mixedSectionOffsets");
        ObjectToOffsetMapping mapping = (ObjectToOffsetMapping) ReflectUtils.getField(fileWriter, "mapping");

        Object2IntMap<DexDebugInfo> debugInfoObject2IntMap = (Object2IntMap<DexDebugInfo>) ReflectUtils.getField(mixedSectionOffsets, "debugInfos");
        Reference2IntMap<DexCode> codesObject2IntMap = (Reference2IntMap<DexCode>) ReflectUtils.getField(mixedSectionOffsets, "codes");

        debugInfoObject2IntMap.clear();
        for (DexProgramClass dexProgramClass:mapping.getClasses()){
            ReflectUtils.updateField(dexProgramClass,"sourceFile",null);
        }
        for (DexCode dexCode:codesObject2IntMap.keySet()){
            dexCode.setDebugInfo(null);
        }
        System.out.println("end to deepShrink of dx");
    }


}
 
Example #3
Source File: PTCM.java    From AffectiveTweets with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Returns a list of partitions of the posting list.
 * @param partSize the size of the partitions.
 * @return a list of word vectors.
 */
public ObjectList<ObjectList<Object2IntMap<String>>> partition(int partSize){

	ObjectList<ObjectList<Object2IntMap<String>>> resList= new ObjectArrayList<ObjectList<Object2IntMap<String>>>();

	// if the partition size is larger than the posting list, then put the whole list into one partition
	// if partsize is less or equal than zero we create one single partition too, which is equivalent to the full
	// tweet centroid model
	if(partSize>=this.postingList.size() || partSize <=0){
		resList.add(this.postingList);
	}
	else{
		int i=0;
		while(i+partSize<=this.postingList.size()){
			resList.add(this.postingList.subList(i, i+partSize));
			i+=partSize;				
		}
		if(i<this.postingList.size()&& i+partSize>this.postingList.size() ){
			resList.add(this.postingList.subList(i, this.postingList.size()));
		}

	}

	return resList;

}
 
Example #4
Source File: AlleleLikelihoods.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void appendEvidence(final List<EVIDENCE> newSampleEvidence, final int sampleIndex) {

        final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex);
        final Object2IntMap<EVIDENCE> sampleEvidenceIndex = evidenceIndexBySampleIndex(sampleIndex);

        for (final EVIDENCE newEvidence : newSampleEvidence) {
            final int previousValue = sampleEvidenceIndex.put(newEvidence, sampleEvidence.size());
            if (previousValue == MISSING_INDEX) {
                sampleEvidence.add(newEvidence);
            } else {
                sampleEvidenceIndex.put(newEvidence, previousValue); // revert
            }
        }

        numberOfEvidences[sampleIndex] = sampleEvidence.size();
    }
 
Example #5
Source File: AlleleLikelihoods.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void changeEvidence(final Map<EVIDENCE, EVIDENCE> evidenceReplacements) {
    final int sampleCount = samples.numberOfSamples();
    for (int s = 0; s < sampleCount; s++) {
        final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(s);
        final Object2IntMap<EVIDENCE> evidenceIndex = evidenceIndexBySampleIndex.get(s);
        final int sampleEvidenceCount = sampleEvidence.size();
        for (int r = 0; r < sampleEvidenceCount; r++) {
            final EVIDENCE evidence = sampleEvidence.get(r);
            final EVIDENCE replacement = evidenceReplacements.get(evidence);
            if (replacement == null) {
                continue;
            }
            sampleEvidence.set(r, replacement);
            if (evidenceIndex != null) {
                evidenceIndex.remove(evidence);
                evidenceIndex.put(replacement, r);
            }
        }
    }
}
 
Example #6
Source File: DistantSupervisionSyntheticFilter.java    From AffectiveTweets with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculates tweet vectors from a list of tokens
 * @param tokens a tokenized tweet
 * @return a mapping between attribute names and values
 */
public Object2IntMap<String> calculateDocVec(List<String> tokens) {

	Object2IntMap<String> docVec = new Object2IntOpenHashMap<String>();
	// add the word-based vector
	if(this.createWordAtts)
		docVec.putAll(affective.core.Utils.calculateTermFreq(tokens,UNIPREFIX,this.freqWeights));

	if(this.createClustAtts){
		// calcultates the vector of clusters
		List<String> brownClust=affective.core.Utils.clustList(tokens,brownDict);
		docVec.putAll(affective.core.Utils.calculateTermFreq(brownClust,CLUSTPREFIX,this.freqWeights));			
	}	


	return docVec;

}
 
Example #7
Source File: TweetCentroid.java    From AffectiveTweets with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculates tweet vectors from a list of tokens
 * @param tokens a tokenized tweet
 * @return a mapping between attribute names and values
 */
public Object2IntMap<String> calculateDocVec(List<String> tokens) {

	Object2IntMap<String> docVec = new Object2IntOpenHashMap<String>();
	// add the word-based vector
	if(this.createWordAtts)
		docVec.putAll(affective.core.Utils.calculateTermFreq(tokens,UNIPREFIX,this.freqWeights));

	if(this.createClustAtts){
		// calcultates the vector of clusters
		List<String> brownClust=affective.core.Utils.clustList(tokens,brownDict);
		docVec.putAll(affective.core.Utils.calculateTermFreq(brownClust,CLUSTPREFIX,this.freqWeights));			
	}	

	return docVec;

}
 
Example #8
Source File: DL4JSequenceRecommender.java    From inception with Apache License 2.0 6 votes vote down vote up
@Override
public void train(RecommenderContext aContext, List<CAS> aCasses)
{
    // Prepare a map where we store the mapping from labels to numeric label IDs - i.e.
    // which index in the label vector represents which label
    Object2IntMap<String> tagsetCollector = new Object2IntOpenHashMap<>();
    
    try {
        ensureEmbeddingsAreAvailable();
        
        // Extract the training data from the CASes
        List<Sample> trainingData = extractData(aCasses, true);
        
        // Use the training data to train the network
        MultiLayerNetwork model = train(trainingData, tagsetCollector);
                    
        aContext.put(KEY_MODEL, model);
        aContext.put(KEY_TAGSET, compileTagset(tagsetCollector));
        aContext.put(KEY_UNKNOWN, randUnk);
    }
    catch (IOException e) {
        throw new IllegalStateException("Unable to train model", e);
    }
}
 
Example #9
Source File: DioriteRandomUtils.java    From Diorite with MIT License 6 votes vote down vote up
@Nullable
public static <T> T getWeightedRandom(Random random, Object2IntMap<T> choices)
{
    long i = 0;
    IntCollection ints = choices.values();
    for (IntIterator iterator = ints.iterator(); iterator.hasNext(); )
    {
        int x = iterator.nextInt();
        i += x;
    }
    i = getRandomLong(random, 0, i);
    for (Object2IntMap.Entry<T> entry : choices.object2IntEntrySet())
    {
        i -= entry.getIntValue();
        if (i < 0)
        {
            return entry.getKey();
        }
    }
    return null;
}
 
Example #10
Source File: ASTAssociation.java    From symja_android_library with GNU General Public License v3.0 6 votes vote down vote up
/** {@inheritDoc} */
@Override
public IExpr evaluate(EvalEngine engine) {
	if (isEvalFlagOff(IAST.BUILT_IN_EVALED)) {
		addEvalFlags(IAST.BUILT_IN_EVALED);
		ASTAssociation result = null;
		for (Object2IntMap.Entry<IExpr> element : map.object2IntEntrySet()) {
			int value = element.getIntValue();
			if (value > 0) {
				// for Rules eval rhs / for RuleDelayed don't
				IExpr temp = engine.evaluateNull(get(value));
				if (temp.isPresent()) {
					if (result == null) {
						result = copy();
					}
					result.set(value, temp);
				}
			}
		}
		if (result != null) {
			return result;
		}
	}
	return F.NIL;
}
 
Example #11
Source File: LanguageDetector.java    From jstarcraft-nlp with Apache License 2.0 6 votes vote down vote up
/**
 * 获取得分
 * 
 * @param tuples
 * @param trie
 * @return
 */
private double getScore(Object2IntMap<CharSequence> tuples, ITrie<Integer> trie) {
    double score = 0D;
    Integer difference;
    for (Object2IntMap.Entry<CharSequence> tuple : tuples.object2IntEntrySet()) {
        difference = trie.get(tuple.getKey().toString());
        if (difference == null) {
            difference = DEFAULT_DIFFERENCE;
        } else {
            difference = tuple.getIntValue() - difference - 1;
            if (difference < 0) {
                difference = -difference;
            }
        }
        score += difference;
    }
    return score;
}
 
Example #12
Source File: Utils.java    From AffectiveTweets with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculates a vector of attributes from a list of tokens
 * 
 * @param tokens the input tokens 
 * @param prefix the prefix of each vector attribute
 * @param freqWeights true for considering term-frequency weights (booleans weights are used otherwise)
 * @return an Object2IntMap object mapping the attributes to their values
 */		
public static Object2IntMap<String> calculateTermFreq(List<String> tokens, String prefix, boolean freqWeights) {
	Object2IntMap<String> termFreq = new Object2IntOpenHashMap<String>();

	// Traverse the strings and increments the counter when the token was
	// already seen before
	for (String token : tokens) {
		// add frequency weights if the flat is set
		if(freqWeights)
			termFreq.put(prefix+token, termFreq.getInt(prefix+token) + 1);
		// otherwise, just consider boolean weights
		else{
			if(!termFreq.containsKey(token))
				termFreq.put(prefix+token, 1);
		}
	}

	return termFreq;
}
 
Example #13
Source File: TableReader.java    From fastjgame with Apache License 2.0 6 votes vote down vote up
/**
 * 读取属性名行
 *
 * @param fileName 文件名,用于打印更详细的错误原因
 * @param rowIndex 行索引
 * @param row      行内容
 * @return 命名行
 */
private ColNameRow readColNameRow(String fileName, int rowIndex, T row) {
    // 使用LinkedHashMap以保持读入顺序
    int totalColNum = getTotalColNum(row);
    Object2IntMap<String> colName2Index = new Object2IntLinkedOpenHashMap<>(totalColNum + 1);
    for (int colIndex = 0; colIndex < totalColNum; colIndex++) {
        String originalColName = getNullableCell(row, colIndex);
        // 属性名称行,空白属性跳过
        if (null == originalColName) {
            continue;
        }
        // 去掉空白填充
        String realColName = originalColName.trim();
        if (realColName.length() == 0) {
            continue;
        }
        // 属性名不可以有重复
        if (colName2Index.containsKey(realColName)) {
            throw new IllegalArgumentException("file " + fileName
                    + " propertyNameRow has duplicate column " + realColName);
        }
        colName2Index.put(realColName, colIndex);
    }
    return new ColNameRow(rowIndex, colName2Index);
}
 
Example #14
Source File: ASTAssociation.java    From symja_android_library with GNU General Public License v3.0 6 votes vote down vote up
@Override
public IExpr remove(int location) {
	normalCache = null;
	IExpr result = super.remove(location);
	for (Object2IntMap.Entry<IExpr> element : map.object2IntEntrySet()) {
		int value = element.getIntValue();
		int indx = value;
		if (indx < 0) {
			indx *= -1;
		}
		if (indx > location) {
			element.setValue(value > 0 ? --value : ++value);
		} else if (indx == location) {
			map.remove(element.getKey(), value);
		}
	}
	return result;
}
 
Example #15
Source File: ForwardBackwardAlgorithm.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Composes a object ot index map given an object list.
 * @param list the list to index.
 * @param <E> the element type.
 * @return never {@code null}.
 */
private <E> Object2IntMap<E> composeIndexMap(final List<E> list) {
    return IntStream.range(0, list.size())
            .collect(
                    () -> new Object2IntOpenHashMap<>(list.size()),
                    (map, i) -> map.put(list.get(i), i),
                    (map1, map2) -> map2.object2IntEntrySet().forEach(
                            e -> map1.put(e.getKey(), e.getIntValue())
                    ));
}
 
Example #16
Source File: ASTAssociation.java    From symja_android_library with GNU General Public License v3.0 5 votes vote down vote up
@Override
public ArrayList<String> keyNames() {
	ArrayList<String> list = new ArrayList<String>();
	for (Object2IntMap.Entry<IExpr> element : map.object2IntEntrySet()) {
		list.add(element.getKey().toString());
	}
	return list;
}
 
Example #17
Source File: DL4JSequenceRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private String[] compileTagset(Object2IntMap<String> aTagsetCollector)
{
    String[] tagset = new String[aTagsetCollector.size()];
    for (Entry<String> e : aTagsetCollector.object2IntEntrySet()) {
        tagset[e.getIntValue()] = e.getKey();
    }
    return tagset;
}
 
Example #18
Source File: ASTAssociation.java    From symja_android_library with GNU General Public License v3.0 5 votes vote down vote up
protected IAST normal(IBuiltInSymbol symbol) {
	IASTMutable list = F.ast(symbol, argSize(), true);

	for (Object2IntMap.Entry<IExpr> element : map.object2IntEntrySet()) {
		int value = element.getIntValue();
		if (value < 0) {
			value *= -1;
			list.set(value, F.RuleDelayed(element.getKey(), get(value)));
		} else {
			list.set(value, F.Rule(element.getKey(), get(value)));
		}
	}
	return list;
}
 
Example #19
Source File: DL4JSequenceRecommender.java    From inception with Apache License 2.0 5 votes vote down vote up
private MultiLayerNetwork train(List<Sample> aTrainingData, Object2IntMap<String> aTagset)
    throws IOException
{
    // Configure the neural network
    MultiLayerNetwork model = createConfiguredNetwork(traits, wordVectors.dimensions());

    final int limit = traits.getTrainingSetSizeLimit();
    final int batchSize = traits.getBatchSize();

    // First vectorizing all sentences and then passing them to the model would consume
    // huge amounts of memory. Thus, every sentence is vectorized and then immediately
    // passed on to the model.
    nextEpoch: for (int epoch = 0; epoch < traits.getnEpochs(); epoch++) {
        int sentNum = 0;
        Iterator<Sample> sampleIterator = aTrainingData.iterator();
        while (sampleIterator.hasNext()) {
            List<DataSet> batch = new ArrayList<>();
            while (sampleIterator.hasNext() && batch.size() < batchSize && sentNum < limit) {
                Sample sample = sampleIterator.next();
                DataSet trainingData = vectorize(asList(sample), aTagset, true);
                batch.add(trainingData);
                sentNum++;
            }
            
            model.fit(new ListDataSetIterator<DataSet>(batch, batch.size()));
            log.trace("Epoch {}: processed {} of {} sentences", epoch, sentNum,
                    aTrainingData.size());
            
            if (sentNum >= limit) {
                continue nextEpoch;
            }
        }
    }

    return model;
}
 
Example #20
Source File: TitlesToWIDMap.java    From tagme with Apache License 2.0 5 votes vote down vote up
@Override
protected Object2IntMap<String> parseSet() throws IOException
{
	final Object2IntOpenHashMap<String> map = new Object2IntOpenHashMap<String>(3000000);
	File input = WikipediaFiles.TITLES.getSourceFile(lang);
	
	SQLWikiParser parser = new SQLWikiParser(log) {
		@Override
		public boolean compute(ArrayList<String> values) throws IOException {
			if (values.get(SQLWikiParser.PAGE_NS).equals(SQLWikiParser.NS_ARTICLE_STRING)){
				
				String title = cleanPageName(values.get(SQLWikiParser.PAGE_TITLE));
				map.put(title, Integer.parseInt(values.get(SQLWikiParser.PAGE_ID)));
				return true;
			} else return false;
		}
		
	};
	InputStreamReader reader = new InputStreamReader(new FileInputStream(input), Charset.forName("UTF-8"));
	parser.compute(reader);
	reader.close();
	
	map.defaultReturnValue(-1);
	map.trim();
	
	return map;
}
 
Example #21
Source File: IgnoreWIDs.java    From tagme with Apache License 2.0 5 votes vote down vote up
@Override
protected IntSet parseSet() throws IOException
{
	log.info("Loading data...");
	Object2IntMap<String> titles = new TitlesToWIDMap(lang).getDataset();
	IntOpenHashSet ids = new IntOpenHashSet(titles.size());
	
	Pattern p_date = WikiPatterns.getPattern(lang, Type.PAGE_DATE);
	Pattern p_other = WikiPatterns.getPattern(lang, Type.PAGE_IGNORE);
	
	PLogger plog = new PLogger(log,"titles","dates","others").setEnd(0, titles.size()).start("Parsing ignore-pages...");
	for(String title : titles.keySet())
	{
		plog.update(0);
		if (p_date.matcher(title).find()) {
			plog.update(1);
			ids.add(titles.get(title));
		}
		else if (p_other.matcher(title).find()) {
			plog.update(2);
			ids.add(titles.get(title));
		}
	}
	plog.stop();
	
	ids.trim();
	return ids;
}
 
Example #22
Source File: RedirectMap.java    From tagme with Apache License 2.0 5 votes vote down vote up
@Override
protected Int2IntMap parseSet() throws IOException
{
	final Object2IntMap<String> titles = new TitlesToWIDMap(lang).getDataset();
	final Int2IntOpenHashMap map = new Int2IntOpenHashMap(3000000);
	SQLWikiParser parser = new SQLWikiParser(log, "Titles NF") {
		@Override
		public boolean compute(ArrayList<String> values) throws IOException
		{
			int ns = Integer.parseInt(values.get(SQLWikiParser.REDIRECT_NS));
			if (ns == SQLWikiParser.NS_ARTICLE)
			{
				int idFrom = Integer.parseInt(values.get(SQLWikiParser.REDIRECT_ID_FROM));
				int idTo = titles.getInt(cleanPageName(values.get(SQLWikiParser.REDIRECT_TITLE_TO)));
				if (idTo >= 0)
					map.put(idFrom, idTo);
				else this.updateItem(0);
				
				return true;
			} else return false;
		}
	};

	File input = WikipediaFiles.REDIRECTS.getSourceFile(lang);
	InputStreamReader in = new InputStreamReader(new FileInputStream(input), Charset.forName("UTF-8"));
	parser.compute(in);
	in.close();
	
	map.defaultReturnValue(-1);
	map.trim();
	
	return map;

}
 
Example #23
Source File: AlleleLikelihoods.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Object2IntMap<EVIDENCE> fillEvidenceToIndexCache(int sampleIndex) {
    final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex);
    final int sampleEvidenceCount = sampleEvidence.size();
    final Object2IntMap<EVIDENCE> index = new Object2IntOpenHashMap<>(sampleEvidenceCount);
    index.defaultReturnValue(MISSING_INDEX);
    for (int r = 0; r < sampleEvidenceCount; r++) {
        index.put(sampleEvidence.get(r), r);
    }
    evidenceIndexBySampleIndex.set(sampleIndex, index);
    return index;
}
 
Example #24
Source File: BestAnchors.java    From tagme with Apache License 2.0 5 votes vote down vote up
String findBest(int wid, final Object2IntMap<String> anchors) throws IOException
{
	Query q = new TermQuery(new Term(WikipediaIndexer.FIELD_WID, ""+wid));
	TopDocs td = articles.search(q, 1);
	if (td.totalHits == 0) return null;//throw new IOException("Unable to find title for WID:"+wid);
	String title = articles.doc(td.scoreDocs[0].doc).get(WikipediaIndexer.FIELD_TITLE);
	title = title.replaceAll("\\&quot;", "\"");

	Set<String> titleTerms = terms(title).keySet();

	List<String> bests = new ArrayList<String>(anchors.size());
	bests.addAll(anchors.keySet());
	Collections.sort(bests, new Comparator<String>() {
		@Override
		public int compare(String o1, String o2) {
			return anchors.getInt(o2)-anchors.getInt(o1);
		}
	});


	for (String a : bests)
	{
		if (anchors.getInt(a)< MIN_ANCHORS) continue;
		Set<String> anchorTerms = terms(a).keySet();
		for(String aw : anchorTerms)
			if (!titleTerms.contains(aw))
				return a;
	}
	return null;
}
 
Example #25
Source File: CounterSet.java    From WikipediaEntities with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Merge second counter set.
 *
 * @param other Other set of counters.
 */
public static <O> void update(Object2IntOpenHashMap<O> first, Object2IntOpenHashMap<O> second) {
  for(Iterator<Object2IntMap.Entry<O>> iter = second.object2IntEntrySet().fastIterator(); iter.hasNext();) {
    Object2IntMap.Entry<O> entry = iter.next();
    second.addTo(entry.getKey(), entry.getIntValue());
  }
}
 
Example #26
Source File: AlleleLikelihoods.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void removeEvidence(final int sampleIndex, final Collection<EVIDENCE> evidences) {
    final Object2IntMap<EVIDENCE> evidenceIndexes = evidenceIndexBySampleIndex(sampleIndex);
    final int[] indexesToRemove = evidences.stream().mapToInt(e -> {
        final int index = evidenceIndexes.getInt(e);
        if (index == MISSING_INDEX) {
            throw new IllegalArgumentException("evidence provided is not in sample");
        } else {
            return index;
        }
    }).sorted().distinct().toArray();
    removeEvidenceByIndex(sampleIndex, indexesToRemove);
}
 
Example #27
Source File: CategoricalColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
default Table countByCategory() {

    final Table t = new Table("Column: " + name());
    final CategoricalColumn<?> categories = (CategoricalColumn<?>) type().create("Category");
    final IntColumn counts = IntColumn.create("Count");

    final Object2IntMap<String> valueToCount = new Object2IntOpenHashMap<>();

    for (int i = 0; i < size(); i++) {
      if (!isMissing(i)) {
        final String next = getString(i);
        if (valueToCount.containsKey(next)) {
          valueToCount.put(next, valueToCount.getInt(next) + 1);
        } else {
          valueToCount.put(next, 1);
        }
      }
    }
    for (Map.Entry<String, Integer> entry : valueToCount.object2IntEntrySet()) {
      categories.appendCell(entry.getKey());
      counts.append(entry.getValue());
    }
    if (countMissing() > 0) {
      categories.appendMissing();
      counts.append(countMissing());
    }
    t.addColumns(categories);
    t.addColumns(counts);
    return t;
  }
 
Example #28
Source File: TweetCentroid.java    From AffectiveTweets with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Adds a new document to the word representation.
 * @param docVector a  document vector
 */
public void addDoc(Object2IntMap<String> docVector){
	this.numDoc++;
	for(String vecWord:docVector.keySet()){
		int vecWordFreq=docVector.getInt(vecWord);
		// if the word was seen before we add the current frequency
		this.wordSpace.put(vecWord,vecWordFreq+this.wordSpace.getInt(vecWord));
	}	

}
 
Example #29
Source File: CategoricalColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
default Table countByCategory() {

    final Table t = new Table("Column: " + name());
    final CategoricalColumn<?> categories = (CategoricalColumn<?>) type().create("Category");
    final IntColumn counts = IntColumn.create("Count");

    final Object2IntMap<String> valueToCount = new Object2IntOpenHashMap<>();

    for (int i = 0; i < size(); i++) {
      if (!isMissing(i)) {
        final String next = getString(i);
        if (valueToCount.containsKey(next)) {
          valueToCount.put(next, valueToCount.getInt(next) + 1);
        } else {
          valueToCount.put(next, 1);
        }
      }
    }
    for (Map.Entry<String, Integer> entry : valueToCount.object2IntEntrySet()) {
      categories.appendCell(entry.getKey());
      counts.append(entry.getValue());
    }
    if (countMissing() > 0) {
      categories.appendMissing();
      counts.append(countMissing());
    }
    t.addColumns(categories);
    t.addColumns(counts);
    return t;
  }
 
Example #30
Source File: CounterSet.java    From WikipediaEntities with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Get a descending list of counted items.
 *
 * @return List of items.
 */
public static <O> List<Entry<O>> descending(Object2IntOpenHashMap<O> counters) {
  ArrayList<Entry<O>> copy = new ArrayList<>(counters.size());
  for(Iterator<Object2IntMap.Entry<O>> iter = counters.object2IntEntrySet().fastIterator(); iter.hasNext();) {
    // Note: fast iterator will recycle this object!
    Object2IntMap.Entry<O> entry = iter.next();
    copy.add(new Entry<O>(entry.getKey(), entry.getIntValue()));
  }
  Collections.sort(copy);
  return copy;
}