Java Code Examples for com.carrotsearch.hppc.IntOpenHashSet

The following examples show how to use com.carrotsearch.hppc.IntOpenHashSet. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
    DocsEnum docs = null;
    Term term = new Term(fieldName, word);
    try {
        int baseDocId;
        for (int i = 0; i < reader.length; i++) {
            docs = reader[i].termDocsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docs != null) {
                while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    documents.add(baseDocId + docs.docID());
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example 2
public CountedSubsets[] determineCounts(String[][] wordsets,
        SegmentationDefinition[] definitions) {
    ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping = new ObjectObjectOpenHashMap<String, IntOpenHashSet>();
    for (int i = 0; i < wordsets.length; ++i) {
        for (int j = 0; j < wordsets[i].length; ++j) {
            if (!wordDocMapping.containsKey(wordsets[i][j])) {
                wordDocMapping.put(wordsets[i][j], new IntOpenHashSet());
            }
        }
    }

    corpusAdapter.getDocumentsWithWordsAsSet(wordDocMapping);

    CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length];
    for (int i = 0; i < definitions.length; ++i) {
        /*countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                        createBitSets(wordDocMapping, wordsets[i]),
                        definitions[i].neededCounts));*/
        countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                createBitSets(wordDocMapping, wordsets[i])));
    }
    return countedSubsets;
}
 
Example 3
private BitSet[] createBitSets(IntOpenHashSet hashSets[],
        IntOpenHashSet mergedHashSet) {
    BitSet bitSets[] = new BitSet[hashSets.length];
    for (int i = 0; i < bitSets.length; ++i) {
        bitSets[i] = new BitSet(mergedHashSet.size());
    }

    int pos = 0;
    for (int i = 0; i < mergedHashSet.keys.length; i++) {
        if (mergedHashSet.allocated[i]) {
            for (int j = 0; j < bitSets.length; ++j) {
                if (hashSets[j].contains(mergedHashSet.keys[i])) {
                    bitSets[j].set(pos);
                }
            }
            ++pos;
        }
    }

    return bitSets;
}
 
Example 4
Source Project: arx   Source File: GeneralizationHierarchy.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns the distinct values.
 *
 * @param level
 * @return
 */
public int[] getDistinctValues(final int level) {

    final IntOpenHashSet vals = new IntOpenHashSet();
    for (int k = 0; k < map.length; k++) {
        vals.add(map[k][level]);
    }

    final int[] result = new int[vals.size()];
    final int[] keys = vals.keys;
    final boolean[] allocated = vals.allocated;
    int index = 0;
    for (int i = 0; i < allocated.length; i++) {
        if (allocated[i]) {
            result[index++] = keys[i];
        }
    }
    return result;
}
 
Example 5
Source Project: jopenfst   Source File: Connect.java    License: MIT License 5 votes vote down vote up
private static void dfsBackward(MutableState state, IntOpenHashSet coaccessible) {
  coaccessible.add(state.getId());
  for (MutableState incoming : state.getIncomingStates()) {
    if (!coaccessible.contains(incoming.getId())) {
      dfsBackward(incoming, coaccessible);
    }
  }
}
 
Example 6
Source Project: jopenfst   Source File: Connect.java    License: MIT License 5 votes vote down vote up
private static void dfsForward(MutableState start, IntOpenHashSet accessible) {
  accessible.add(start.getId());
  for (MutableArc arc : start.getArcs()) {
    MutableState nextState = arc.getNextState();
    if (!accessible.contains(nextState.getId())) {
      dfsForward(nextState, accessible);
    }
  }
}
 
Example 7
@RequestMapping(value = "df")
public ResponseEntity<byte[]> requestDocFreq(@RequestParam(value = "words") String words) {
    if (luceneAdapter instanceof BooleanDocumentSupportingAdapter) {
        String array[] = words.split(WORD_SEPARATOR);
        IntOpenHashSet documentIds = new IntOpenHashSet();
        IntBuffer buffers[] = new IntBuffer[array.length];
        int completeLength = 0;
        for (int j = 0; j < array.length; ++j) {
            documentIds.clear();
            ((BooleanDocumentSupportingAdapter) luceneAdapter).getDocumentsWithWordAsSet(array[j], documentIds);
            completeLength += (4 * documentIds.size()) + 4;
            buffers[j] = IntBuffer.allocate(documentIds.size());
            if (documentIds.size() > 0) {
                for (int i = 0; i < documentIds.keys.length; ++i) {
                    if (documentIds.allocated[i]) {
                        buffers[j].put(documentIds.keys[i]);
                    }
                }
            }
        }
        ByteBuffer response = ByteBuffer.allocate(completeLength);
        IntBuffer intView = response.asIntBuffer();
        for (int j = 0; j < buffers.length; ++j) {
            intView.put(buffers[j].capacity());
            intView.put(buffers[j].array());
        }
        return new ResponseEntity<byte[]>(response.array(), HttpStatus.OK);
    } else {
        return new ResponseEntity<>(HttpStatus.NOT_IMPLEMENTED);
    }
}
 
Example 8
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            getDocumentsWithWordAsSet((String) keys[i], (IntOpenHashSet) values[i]);
        }
    }
}
 
Example 9
private BitSet[] createBitSets(
        ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping,
        String[] wordset) {
    IntOpenHashSet hashSets[] = new IntOpenHashSet[wordset.length];
    IntOpenHashSet mergedHashSet = new IntOpenHashSet();
    for (int i = 0; i < hashSets.length; ++i) {
        hashSets[i] = wordDocMapping.get(wordset[i]);
        mergedHashSet.addAll(hashSets[i]);
    }
    return createBitSets(hashSets, mergedHashSet);
}
 
Example 10
public void testSubsetCreator(int wordsetSize, Segmentator subsetCreator,
            int expectedSegments[], int expectedConditions[][]) {
        IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping = new IntObjectOpenHashMap<IntOpenHashSet>();
        BitSet neededCounts = new BitSet();
        createSets(expectedSegments, expectedConditions,
                segmentToConditionMapping, neededCounts);

        SegmentationDefinition definition = subsetCreator.getSubsetDefinition(wordsetSize);

//        compare(definition, segmentToConditionMapping, neededCounts);
        compare(definition, segmentToConditionMapping);
    }
 
Example 11
private void compare(SegmentationDefinition definition,
                     IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping) {
    IntOpenHashSet conditionSet;

    Assert.assertEquals(segmentToConditionMapping.size(),
            definition.segments.length);
    for (int i = 0; i < definition.segments.length; i++) {
        Assert.assertTrue("got unexpected segment "
                + definition.segments[i], segmentToConditionMapping
                .containsKey(definition.segments[i]));
        conditionSet = segmentToConditionMapping
                .get(definition.segments[i]);
        for (int j = 0; j < definition.conditions[i].length; ++j) {
            Assert.assertEquals(
                    "expected " + conditionSet.size() + " conditions "
                            + conditionSet.toString()
                            + " for segment ["
                            + definition.segments[i] + "] but got " + definition.conditions[i].length + " "
                            + Arrays.toString(definition.conditions[i]) + ".",
                    conditionSet.size(),
                    definition.conditions[i].length);
            Assert.assertTrue("got unexpected condition "
                    + definition.conditions[i][j] + " for segment "
                    + definition.segments[i],
                    conditionSet.contains(definition.conditions[i][j]));
        }
    }

    // Assert.assertEquals(neededCounts, definition.neededCounts);
}
 
Example 12
@Override
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            ((IntOpenHashSet) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]);
        }
    }
}
 
Example 13
public IntOpenHashSet getClassIds() {
    return classIds;
}
 
Example 14
public void setClassIds(IntOpenHashSet classIds) {
    this.classIds = classIds;
}
 
Example 15
Source Project: jopenfst   Source File: NShortestPaths.java    License: MIT License 4 votes vote down vote up
/**
 * Calculates the shortest distances from each state to the final.
 *
 * See: M. Mohri, "Semiring Framework and Algorithms for Shortest-Distance Problems", Journal of Automata, Languages
 * and Combinatorics, 7(3), pp. 321-350, 2002.
 *
 * @param fst the fst to calculate the shortest distances
 * @return the array containing the shortest distances
 */
private static double[] shortestDistance(Fst fst) {

  Fst reversed = Reverse.reverse(fst);

  double[] d = new double[reversed.getStateCount()];
  double[] r = new double[reversed.getStateCount()];

  Semiring semiring = reversed.getSemiring();

  for (int i = 0; i < d.length; i++) {
    d[i] = semiring.zero();
    r[i] = semiring.zero();
  }

  IntObjectOpenHashMap<State> stateMap = new IntObjectOpenHashMap<>();
  Deque<Integer> queue = new LinkedList<>();
  IntOpenHashSet enqueuedStateIds = new IntOpenHashSet();

  queue.addLast(reversed.getStartState().getId());
  stateMap.put(reversed.getStartState().getId(), reversed.getStartState());

  d[reversed.getStartState().getId()] = semiring.one();
  r[reversed.getStartState().getId()] = semiring.one();

  while (!queue.isEmpty()) {
    int thisStateId = queue.removeFirst();
    enqueuedStateIds.remove(thisStateId);
    State thisState = stateMap.get(thisStateId);
    double rnew = r[thisState.getId()];
    r[thisState.getId()] = semiring.zero();

    for (int i = 0; i < thisState.getArcCount(); i++) {
      Arc arc = thisState.getArc(i);
      State nextState = arc.getNextState();
      double dnext = d[arc.getNextState().getId()];
      double dnextnew = semiring.plus(dnext, semiring.times(rnew, arc.getWeight()));
      if (dnext != dnextnew) {
        d[arc.getNextState().getId()] = dnextnew;
        r[arc.getNextState().getId()] = semiring.plus(r[arc.getNextState().getId()], semiring.times(rnew, arc.getWeight()));
        int nextStateId = nextState.getId();
        if (!enqueuedStateIds.contains(nextStateId)) {
          queue.addLast(nextStateId);
          enqueuedStateIds.add(nextStateId);
          stateMap.put(nextStateId, nextState);
        }
      }
    }
  }
  return d;
}
 
Example 16
@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
    documents.add(wordDocuments[Integer.parseInt(word)]);
}
 
Example 17
/**
 * Determines the documents containing the words used as key in the given
 * map. The resulting sets contain the ids of the documents and are inserted
 * into the map.
 * 
 * @param wordDocMapping
 *            a mapping of words to documents in which the results are
 *            stored
 */
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping);
 
Example 18
/**
 * Determines the documents containing the given word. The ids of the found
 * documents are inserted into the given set.
 * 
 * @param word
 *            the word which should be searched
 * @param documents
 *            the set in which the document ids will be stored
 */
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents);