Java Code Examples for it.unimi.dsi.fastutil.objects.Object2IntMap#put()

The following examples show how to use it.unimi.dsi.fastutil.objects.Object2IntMap#put() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TableReader.java    From fastjgame with Apache License 2.0 6 votes vote down vote up
/**
 * 读取属性名行
 *
 * @param fileName 文件名,用于打印更详细的错误原因
 * @param rowIndex 行索引
 * @param row      行内容
 * @return 命名行
 */
private ColNameRow readColNameRow(String fileName, int rowIndex, T row) {
    // 使用LinkedHashMap以保持读入顺序
    int totalColNum = getTotalColNum(row);
    Object2IntMap<String> colName2Index = new Object2IntLinkedOpenHashMap<>(totalColNum + 1);
    for (int colIndex = 0; colIndex < totalColNum; colIndex++) {
        String originalColName = getNullableCell(row, colIndex);
        // 属性名称行,空白属性跳过
        if (null == originalColName) {
            continue;
        }
        // 去掉空白填充
        String realColName = originalColName.trim();
        if (realColName.length() == 0) {
            continue;
        }
        // 属性名不可以有重复
        if (colName2Index.containsKey(realColName)) {
            throw new IllegalArgumentException("file " + fileName
                    + " propertyNameRow has duplicate column " + realColName);
        }
        colName2Index.put(realColName, colIndex);
    }
    return new ColNameRow(rowIndex, colName2Index);
}
 
Example 2
Source File: Utils.java    From AffectiveTweets with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Calculates a vector of attributes from a list of tokens
 * 
 * @param tokens the input tokens 
 * @param prefix the prefix of each vector attribute
 * @param freqWeights true for considering term-frequency weights (booleans weights are used otherwise)
 * @return an Object2IntMap object mapping the attributes to their values
 */		
public static Object2IntMap<String> calculateTermFreq(List<String> tokens, String prefix, boolean freqWeights) {
	Object2IntMap<String> termFreq = new Object2IntOpenHashMap<String>();

	// Traverse the strings and increments the counter when the token was
	// already seen before
	for (String token : tokens) {
		// add frequency weights if the flat is set
		if(freqWeights)
			termFreq.put(prefix+token, termFreq.getInt(prefix+token) + 1);
		// otherwise, just consider boolean weights
		else{
			if(!termFreq.containsKey(token))
				termFreq.put(prefix+token, 1);
		}
	}

	return termFreq;
}
 
Example 3
Source File: ReadCountCollection.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Rearrange the targets so that they are in a particular order.
 * @return a new collection.
 * @throws IllegalArgumentException if any of the following is true:
 * <ul>
 *     <li>{@code targetsInOrder} is {@code null},</li>
 *     <li>is empty,</li>
 *     <li>it contains {@code null},</li>
 *     <li>contains any target not present in this collection.</li>
 * </ul>
 */
public ReadCountCollection arrangeTargets(final List<Target> targetsInOrder) {
    Utils.nonNull(targetsInOrder);
    Utils.nonEmpty(targetsInOrder, "the input targets list cannot be empty");
    final RealMatrix counts = new Array2DRowRealMatrix(targetsInOrder.size(), columnNames.size());
    final Object2IntMap<Target> targetToIndex = new Object2IntOpenHashMap<>(targets.size());
    for (int i = 0; i < targets.size(); i++) {
        targetToIndex.put(targets.get(i), i);
    }
    for (int i = 0; i < targetsInOrder.size(); i++) {
        final Target target = targetsInOrder.get(i);
        Utils.validateArg(targetToIndex.containsKey(target), () -> String.format("target '%s' is not present in the collection", target.getName()));
        counts.setRow(i, this.counts.getRow(targetToIndex.getInt(target)));
    }
    return new ReadCountCollection(new ArrayList<>(targetsInOrder), columnNames, counts, false);
}
 
Example 4
Source File: AlleleLikelihoods.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public void changeEvidence(final Map<EVIDENCE, EVIDENCE> evidenceReplacements) {
    final int sampleCount = samples.numberOfSamples();
    for (int s = 0; s < sampleCount; s++) {
        final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(s);
        final Object2IntMap<EVIDENCE> evidenceIndex = evidenceIndexBySampleIndex.get(s);
        final int sampleEvidenceCount = sampleEvidence.size();
        for (int r = 0; r < sampleEvidenceCount; r++) {
            final EVIDENCE evidence = sampleEvidence.get(r);
            final EVIDENCE replacement = evidenceReplacements.get(evidence);
            if (replacement == null) {
                continue;
            }
            sampleEvidence.set(r, replacement);
            if (evidenceIndex != null) {
                evidenceIndex.remove(evidence);
                evidenceIndex.put(replacement, r);
            }
        }
    }
}
 
Example 5
Source File: AlleleLikelihoods.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void appendEvidence(final List<EVIDENCE> newSampleEvidence, final int sampleIndex) {

        final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex);
        final Object2IntMap<EVIDENCE> sampleEvidenceIndex = evidenceIndexBySampleIndex(sampleIndex);

        for (final EVIDENCE newEvidence : newSampleEvidence) {
            final int previousValue = sampleEvidenceIndex.put(newEvidence, sampleEvidence.size());
            if (previousValue == MISSING_INDEX) {
                sampleEvidence.add(newEvidence);
            } else {
                sampleEvidenceIndex.put(newEvidence, previousValue); // revert
            }
        }

        numberOfEvidences[sampleIndex] = sampleEvidence.size();
    }
 
Example 6
Source File: SmartDictionarySerializer.java    From mph-table with Apache License 2.0 5 votes vote down vote up
private Object2IntMap<String> indexToDictionary(final String[] words) throws IOException {
    final Object2IntMap<String> result = new Object2IntOpenHashMap<>();
    for (int i = 0; i < words.length; ++i) {
        result.put(words[i], i);
    }
    return result;
}
 
Example 7
Source File: CategoricalColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
default Table countByCategory() {

    final Table t = new Table("Column: " + name());
    final CategoricalColumn<?> categories = (CategoricalColumn<?>) type().create("Category");
    final IntColumn counts = IntColumn.create("Count");

    final Object2IntMap<String> valueToCount = new Object2IntOpenHashMap<>();

    for (int i = 0; i < size(); i++) {
      if (!isMissing(i)) {
        final String next = getString(i);
        if (valueToCount.containsKey(next)) {
          valueToCount.put(next, valueToCount.getInt(next) + 1);
        } else {
          valueToCount.put(next, 1);
        }
      }
    }
    for (Map.Entry<String, Integer> entry : valueToCount.object2IntEntrySet()) {
      categories.appendCell(entry.getKey());
      counts.append(entry.getValue());
    }
    if (countMissing() > 0) {
      categories.appendMissing();
      counts.append(countMissing());
    }
    t.addColumns(categories);
    t.addColumns(counts);
    return t;
  }
 
Example 8
Source File: CategoricalColumn.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
default Table countByCategory() {

    final Table t = new Table("Column: " + name());
    final CategoricalColumn<?> categories = (CategoricalColumn<?>) type().create("Category");
    final IntColumn counts = IntColumn.create("Count");

    final Object2IntMap<String> valueToCount = new Object2IntOpenHashMap<>();

    for (int i = 0; i < size(); i++) {
      if (!isMissing(i)) {
        final String next = getString(i);
        if (valueToCount.containsKey(next)) {
          valueToCount.put(next, valueToCount.getInt(next) + 1);
        } else {
          valueToCount.put(next, 1);
        }
      }
    }
    for (Map.Entry<String, Integer> entry : valueToCount.object2IntEntrySet()) {
      categories.appendCell(entry.getKey());
      counts.append(entry.getValue());
    }
    if (countMissing() > 0) {
      categories.appendMissing();
      counts.append(countMissing());
    }
    t.addColumns(categories);
    t.addColumns(counts);
    return t;
  }
 
Example 9
Source File: AlleleLikelihoods.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Object2IntMap<EVIDENCE> fillEvidenceToIndexCache(int sampleIndex) {
    final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex);
    final int sampleEvidenceCount = sampleEvidence.size();
    final Object2IntMap<EVIDENCE> index = new Object2IntOpenHashMap<>(sampleEvidenceCount);
    index.defaultReturnValue(MISSING_INDEX);
    for (int r = 0; r < sampleEvidenceCount; r++) {
        index.put(sampleEvidence.get(r), r);
    }
    evidenceIndexBySampleIndex.set(sampleIndex, index);
    return index;
}
 
Example 10
Source File: SymmetricImmutablePairTest.java    From liblevenshtein-java with MIT License 5 votes vote down vote up
@Test(dataProvider = "equivalentPairs")
public void testEquivalentPairs(
    final SymmetricImmutablePair<String> lhs,
    final SymmetricImmutablePair<String> rhs) {

  assertThat(lhs).isEqualByComparingTo(lhs);
  assertThat(rhs).isEqualByComparingTo(rhs);
  assertThat(lhs).isEqualByComparingTo(rhs);
  assertThat(rhs).isEqualByComparingTo(lhs);

  assertThat(lhs).isEqualTo(lhs);
  assertThat(rhs).isEqualTo(rhs);
  assertThat(lhs).isEqualTo(rhs);
  assertThat(rhs).isEqualTo(lhs);

  assertThat(lhs.hashCode()).isEqualTo(rhs.hashCode());

  Object2IntMap<SymmetricImmutablePair<String>> map;

  map = new Object2IntOpenHashMap<>(2);

  map.put(lhs, 1);
  assertThat(map).containsEntry(lhs, 1);
  assertThat(map).containsEntry(rhs, 1);

  map.put(rhs, 2);
  assertThat(map).containsEntry(rhs, 2);
  assertThat(map).containsEntry(lhs, 2);

  map = new Object2IntRBTreeMap<>();

  map.put(lhs, 1);
  assertThat(map).containsEntry(lhs, 1);
  assertThat(map).containsEntry(rhs, 1);

  map.put(rhs, 2);
  assertThat(map).containsEntry(rhs, 2);
  assertThat(map).containsEntry(lhs, 2);
}
 
Example 11
Source File: NoDictionarySingleColumnGroupKeyGenerator.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private int getKeyForValue(String value) {
  Object2IntMap<String> map = (Object2IntMap<String>) _groupKeyMap;
  int groupId = map.getInt(value);
  if (groupId == INVALID_ID) {
    if (_numGroups < _globalGroupIdUpperBound) {
      groupId = _numGroups;
      map.put(value, _numGroups++);
    }
  }
  return groupId;
}
 
Example 12
Source File: HivePageSink.java    From presto with Apache License 2.0 4 votes vote down vote up
public HivePageSink(
        HiveWriterFactory writerFactory,
        List<HiveColumnHandle> inputColumns,
        Optional<HiveBucketProperty> bucketProperty,
        PageIndexerFactory pageIndexerFactory,
        HdfsEnvironment hdfsEnvironment,
        int maxOpenWriters,
        ListeningExecutorService writeVerificationExecutor,
        JsonCodec<PartitionUpdate> partitionUpdateCodec,
        ConnectorSession session)
{
    this.writerFactory = requireNonNull(writerFactory, "writerFactory is null");

    requireNonNull(inputColumns, "inputColumns is null");

    requireNonNull(pageIndexerFactory, "pageIndexerFactory is null");

    this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
    this.maxOpenWriters = maxOpenWriters;
    this.writeVerificationExecutor = requireNonNull(writeVerificationExecutor, "writeVerificationExecutor is null");
    this.partitionUpdateCodec = requireNonNull(partitionUpdateCodec, "partitionUpdateCodec is null");

    requireNonNull(bucketProperty, "bucketProperty is null");
    this.pagePartitioner = new HiveWriterPagePartitioner(
            inputColumns,
            bucketProperty.isPresent(),
            pageIndexerFactory);

    // determine the input index of the partition columns and data columns
    // and determine the input index and type of bucketing columns
    ImmutableList.Builder<Integer> partitionColumns = ImmutableList.builder();
    ImmutableList.Builder<Integer> dataColumnsInputIndex = ImmutableList.builder();
    Object2IntMap<String> dataColumnNameToIdMap = new Object2IntOpenHashMap<>();
    Map<String, HiveType> dataColumnNameToTypeMap = new HashMap<>();
    // sample weight column is passed separately, so index must be calculated without this column
    for (int inputIndex = 0; inputIndex < inputColumns.size(); inputIndex++) {
        HiveColumnHandle column = inputColumns.get(inputIndex);
        if (column.isPartitionKey()) {
            partitionColumns.add(inputIndex);
        }
        else {
            dataColumnsInputIndex.add(inputIndex);
            dataColumnNameToIdMap.put(column.getName(), inputIndex);
            dataColumnNameToTypeMap.put(column.getName(), column.getHiveType());
        }
    }
    this.partitionColumnsInputIndex = Ints.toArray(partitionColumns.build());
    this.dataColumnInputIndex = Ints.toArray(dataColumnsInputIndex.build());

    if (bucketProperty.isPresent()) {
        BucketingVersion bucketingVersion = bucketProperty.get().getBucketingVersion();
        int bucketCount = bucketProperty.get().getBucketCount();
        bucketColumns = bucketProperty.get().getBucketedBy().stream()
                .mapToInt(dataColumnNameToIdMap::get)
                .toArray();
        List<HiveType> bucketColumnTypes = bucketProperty.get().getBucketedBy().stream()
                .map(dataColumnNameToTypeMap::get)
                .collect(toList());
        bucketFunction = new HiveBucketFunction(bucketingVersion, bucketCount, bucketColumnTypes);
    }
    else {
        bucketColumns = null;
        bucketFunction = null;
    }

    this.session = requireNonNull(session, "session is null");
}
 
Example 13
Source File: LanguageDetector.java    From jstarcraft-nlp with Apache License 2.0 4 votes vote down vote up
/**
 * 检测语言
 * 
 * @param text
 * @param options
 * @return
 */
public SortedSet<DetectionLanguage> detectLanguages(String text, Object2BooleanMap<String> options) {
    SortedSet<DetectionLanguage> locales = new TreeSet<>();

    // 最小长度限制
    int size = text.length();
    if (size < minimum) {
        return locales;
    }
    // 最大长度限制
    if (size > maximum) {
        text = text.substring(0, maximum);
        size = maximum;
    }

    // 白名单,黑名单
    Set<String> writes = options.size() == 0 ? Collections.EMPTY_SET : new HashSet<>();
    Set<String> blacks = options.size() == 0 ? Collections.EMPTY_SET : new HashSet<>();
    for (Object2BooleanMap.Entry<String> option : options.object2BooleanEntrySet()) {
        if (option.getBooleanValue()) {
            writes.add(option.getKey());
        } else {
            blacks.add(option.getKey());
        }
    }

    /*
     * Get the script which characters occur the most in `value`.
     */
    int count = -1;
    String script = null;
    for (DetectionPattern regulation : patterns.values()) {
        Pattern pattern = regulation.getPattern();
        Matcher matcher = pattern.matcher(text);
        int match = 0;
        while (matcher.find()) {
            match++;
        }
        if (match > count) {
            count = match;
            script = regulation.getName();
        }
    }
    if (script == null || count <= 0) {
        return locales;
    }

    /* One languages exists for the most-used script. */
    Set<DetectionTrie> dictionaries = tires.get(script);
    if (dictionaries == null) {
        /*
         * If no matches occured, such as a digit only string, or because the language is ignored, exit with `und`.
         */
        if (!checkLanguage(script, writes, blacks)) {
            return locales;
        }
        locales.add(new DetectionLanguage(Locale.forLanguageTag(script), 1D));
        return locales;
    }

    /*
     * Get all distances for a given script, and normalize the distance values.
     */
    // 前后补空格是为了N-Gram处理
    text = StringUtility.SPACE + REPLACE.matcher(text).replaceAll(StringUtility.SPACE).toLowerCase() + StringUtility.SPACE;
    CharacterNgram ngram = new CharacterNgram(3, text);
    Object2IntMap<CharSequence> tuples = new Object2IntOpenHashMap<>();
    for (CharSequence character : ngram) {
        count = tuples.getInt(character);
        tuples.put(character, count + 1);
    }
    for (DetectionTrie dictionary : dictionaries) {
        String language = dictionary.getName();
        if (checkLanguage(language, writes, blacks)) {
            double score = getScore(tuples, dictionary.getTrie());
            DetectionLanguage locale = new DetectionLanguage(Locale.forLanguageTag(language), score);
            locales.add(locale);
        }
    }
    if (!locales.isEmpty()) {
        normalizeScores(text, locales);
    }
    return locales;
}
 
Example 14
Source File: OpenNlpNerRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
private Span[] extractAnnotatedSpans(CAS aCas, AnnotationFS aSentence,
                                     Collection<AnnotationFS> aTokens) {
    // Convert character offsets to token indices
    Int2ObjectMap<AnnotationFS> idxTokenOffset = new Int2ObjectOpenHashMap<>();
    Object2IntMap<AnnotationFS> idxToken = new Object2IntOpenHashMap<>();
    int idx = 0;
    for (AnnotationFS t : aTokens) {
        idxTokenOffset.put(t.getBegin(), t);
        idxTokenOffset.put(t.getEnd(), t);
        idxToken.put(t, idx);
        idx++;
    }

    // Create spans from target annotations
    Type annotationType = getType(aCas, layerName);
    Feature feature = annotationType.getFeatureByBaseName(featureName);
    List<AnnotationFS> annotations = selectCovered(annotationType, aSentence);
    int numberOfAnnotations = annotations.size();
    List<Span> result = new ArrayList<>();

    int highestEndTokenPositionObserved = 0;
    for (int i = 0; i < numberOfAnnotations; i++) {
        AnnotationFS annotation = annotations.get(i);
        String label = annotation.getFeatureValueAsString(feature);
        
        AnnotationFS beginToken = idxTokenOffset.get(annotation.getBegin());
        AnnotationFS endToken = idxTokenOffset.get(annotation.getEnd());
        if (beginToken == null || endToken == null) {
            LOG.warn("Skipping annotation not starting/ending at token boundaries: [{}-{}, {}]",
                    annotation.getBegin(), annotation.getEnd(), label);
            continue;
        }
        
        int begin = idxToken.get(beginToken);
        int end = idxToken.get(endToken);
        
        // If the begin offset of the current annotation is lower than the highest offset so far
        // observed, then it is overlapping with some annotation that we have seen before. 
        // Because OpenNLP NER does not support overlapping annotations, we skip it.
        if (begin < highestEndTokenPositionObserved) {
            LOG.debug("Skipping overlapping annotation: [{}-{}, {}]", begin, end + 1, label);
            continue;
        }
        
        if (isNotBlank(label)) {
            result.add(new Span(begin, end + 1, label));
            highestEndTokenPositionObserved = end + 1;
        }
    }
    return result.toArray(new Span[result.size()]);
}