com.carrotsearch.hppc.IntOpenHashSet Java Examples

The following examples show how to use com.carrotsearch.hppc.IntOpenHashSet. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GeneralizationHierarchy.java    From arx with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the distinct values.
 *
 * @param level
 * @return
 */
public int[] getDistinctValues(final int level) {

    final IntOpenHashSet vals = new IntOpenHashSet();
    for (int k = 0; k < map.length; k++) {
        vals.add(map[k][level]);
    }

    final int[] result = new int[vals.size()];
    final int[] keys = vals.keys;
    final boolean[] allocated = vals.allocated;
    int index = 0;
    for (int i = 0; i < allocated.length; i++) {
        if (allocated[i]) {
            result[index++] = keys[i];
        }
    }
    return result;
}
 
Example #2
Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 6 votes vote down vote up
private BitSet[] createBitSets(IntOpenHashSet hashSets[],
        IntOpenHashSet mergedHashSet) {
    BitSet bitSets[] = new BitSet[hashSets.length];
    for (int i = 0; i < bitSets.length; ++i) {
        bitSets[i] = new BitSet(mergedHashSet.size());
    }

    int pos = 0;
    for (int i = 0; i < mergedHashSet.keys.length; i++) {
        if (mergedHashSet.allocated[i]) {
            for (int j = 0; j < bitSets.length; ++j) {
                if (hashSets[j].contains(mergedHashSet.keys[i])) {
                    bitSets[j].set(pos);
                }
            }
            ++pos;
        }
    }

    return bitSets;
}
 
Example #3
Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 6 votes vote down vote up
public CountedSubsets[] determineCounts(String[][] wordsets,
        SegmentationDefinition[] definitions) {
    ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping = new ObjectObjectOpenHashMap<String, IntOpenHashSet>();
    for (int i = 0; i < wordsets.length; ++i) {
        for (int j = 0; j < wordsets[i].length; ++j) {
            if (!wordDocMapping.containsKey(wordsets[i][j])) {
                wordDocMapping.put(wordsets[i][j], new IntOpenHashSet());
            }
        }
    }

    corpusAdapter.getDocumentsWithWordsAsSet(wordDocMapping);

    CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length];
    for (int i = 0; i < definitions.length; ++i) {
        /*countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                        createBitSets(wordDocMapping, wordsets[i]),
                        definitions[i].neededCounts));*/
        countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                createBitSets(wordDocMapping, wordsets[i])));
    }
    return countedSubsets;
}
 
Example #4
Source File: LuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
    DocsEnum docs = null;
    Term term = new Term(fieldName, word);
    try {
        int baseDocId;
        for (int i = 0; i < reader.length; i++) {
            docs = reader[i].termDocsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docs != null) {
                while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    documents.add(baseDocId + docs.docID());
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example #5
Source File: LuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            getDocumentsWithWordAsSet((String) keys[i], (IntOpenHashSet) values[i]);
        }
    }
}
 
Example #6
Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            ((IntOpenHashSet) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]);
        }
    }
}
 
Example #7
Source File: AbstractSegmentatorTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
private void compare(SegmentationDefinition definition,
                     IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping) {
    IntOpenHashSet conditionSet;

    Assert.assertEquals(segmentToConditionMapping.size(),
            definition.segments.length);
    for (int i = 0; i < definition.segments.length; i++) {
        Assert.assertTrue("got unexpected segment "
                + definition.segments[i], segmentToConditionMapping
                .containsKey(definition.segments[i]));
        conditionSet = segmentToConditionMapping
                .get(definition.segments[i]);
        for (int j = 0; j < definition.conditions[i].length; ++j) {
            Assert.assertEquals(
                    "expected " + conditionSet.size() + " conditions "
                            + conditionSet.toString()
                            + " for segment ["
                            + definition.segments[i] + "] but got " + definition.conditions[i].length + " "
                            + Arrays.toString(definition.conditions[i]) + ".",
                    conditionSet.size(),
                    definition.conditions[i].length);
            Assert.assertTrue("got unexpected condition "
                    + definition.conditions[i][j] + " for segment "
                    + definition.segments[i],
                    conditionSet.contains(definition.conditions[i][j]));
        }
    }

    // Assert.assertEquals(neededCounts, definition.neededCounts);
}
 
Example #8
Source File: AbstractSegmentatorTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testSubsetCreator(int wordsetSize, Segmentator subsetCreator,
            int expectedSegments[], int expectedConditions[][]) {
        IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping = new IntObjectOpenHashMap<IntOpenHashSet>();
        BitSet neededCounts = new BitSet();
        createSets(expectedSegments, expectedConditions,
                segmentToConditionMapping, neededCounts);

        SegmentationDefinition definition = subsetCreator.getSubsetDefinition(wordsetSize);

//        compare(definition, segmentToConditionMapping, neededCounts);
        compare(definition, segmentToConditionMapping);
    }
 
Example #9
Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
private BitSet[] createBitSets(
        ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping,
        String[] wordset) {
    IntOpenHashSet hashSets[] = new IntOpenHashSet[wordset.length];
    IntOpenHashSet mergedHashSet = new IntOpenHashSet();
    for (int i = 0; i < hashSets.length; ++i) {
        hashSets[i] = wordDocMapping.get(wordset[i]);
        mergedHashSet.addAll(hashSets[i]);
    }
    return createBitSets(hashSets, mergedHashSet);
}
 
Example #10
Source File: PalmettoApplication.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@RequestMapping(value = "df")
public ResponseEntity<byte[]> requestDocFreq(@RequestParam(value = "words") String words) {
    if (luceneAdapter instanceof BooleanDocumentSupportingAdapter) {
        String array[] = words.split(WORD_SEPARATOR);
        IntOpenHashSet documentIds = new IntOpenHashSet();
        IntBuffer buffers[] = new IntBuffer[array.length];
        int completeLength = 0;
        for (int j = 0; j < array.length; ++j) {
            documentIds.clear();
            ((BooleanDocumentSupportingAdapter) luceneAdapter).getDocumentsWithWordAsSet(array[j], documentIds);
            completeLength += (4 * documentIds.size()) + 4;
            buffers[j] = IntBuffer.allocate(documentIds.size());
            if (documentIds.size() > 0) {
                for (int i = 0; i < documentIds.keys.length; ++i) {
                    if (documentIds.allocated[i]) {
                        buffers[j].put(documentIds.keys[i]);
                    }
                }
            }
        }
        ByteBuffer response = ByteBuffer.allocate(completeLength);
        IntBuffer intView = response.asIntBuffer();
        for (int j = 0; j < buffers.length; ++j) {
            intView.put(buffers[j].capacity());
            intView.put(buffers[j].array());
        }
        return new ResponseEntity<byte[]>(response.array(), HttpStatus.OK);
    } else {
        return new ResponseEntity<>(HttpStatus.NOT_IMPLEMENTED);
    }
}
 
Example #11
Source File: Connect.java    From jopenfst with MIT License 5 votes vote down vote up
private static void dfsForward(MutableState start, IntOpenHashSet accessible) {
  accessible.add(start.getId());
  for (MutableArc arc : start.getArcs()) {
    MutableState nextState = arc.getNextState();
    if (!accessible.contains(nextState.getId())) {
      dfsForward(nextState, accessible);
    }
  }
}
 
Example #12
Source File: Connect.java    From jopenfst with MIT License 5 votes vote down vote up
private static void dfsBackward(MutableState state, IntOpenHashSet coaccessible) {
  coaccessible.add(state.getId());
  for (MutableState incoming : state.getIncomingStates()) {
    if (!coaccessible.contains(incoming.getId())) {
      dfsBackward(incoming, coaccessible);
    }
  }
}
 
Example #13
Source File: ClassifiedClassNode.java    From gerbil with GNU Affero General Public License v3.0 4 votes vote down vote up
public IntOpenHashSet getClassIds() {
    return classIds;
}
 
Example #14
Source File: NShortestPaths.java    From jopenfst with MIT License 4 votes vote down vote up
/**
 * Calculates the shortest distances from each state to the final.
 *
 * See: M. Mohri, "Semiring Framework and Algorithms for Shortest-Distance Problems", Journal of Automata, Languages
 * and Combinatorics, 7(3), pp. 321-350, 2002.
 *
 * @param fst the fst to calculate the shortest distances
 * @return the array containing the shortest distances
 */
private static double[] shortestDistance(Fst fst) {

  Fst reversed = Reverse.reverse(fst);

  double[] d = new double[reversed.getStateCount()];
  double[] r = new double[reversed.getStateCount()];

  Semiring semiring = reversed.getSemiring();

  for (int i = 0; i < d.length; i++) {
    d[i] = semiring.zero();
    r[i] = semiring.zero();
  }

  IntObjectOpenHashMap<State> stateMap = new IntObjectOpenHashMap<>();
  Deque<Integer> queue = new LinkedList<>();
  IntOpenHashSet enqueuedStateIds = new IntOpenHashSet();

  queue.addLast(reversed.getStartState().getId());
  stateMap.put(reversed.getStartState().getId(), reversed.getStartState());

  d[reversed.getStartState().getId()] = semiring.one();
  r[reversed.getStartState().getId()] = semiring.one();

  while (!queue.isEmpty()) {
    int thisStateId = queue.removeFirst();
    enqueuedStateIds.remove(thisStateId);
    State thisState = stateMap.get(thisStateId);
    double rnew = r[thisState.getId()];
    r[thisState.getId()] = semiring.zero();

    for (int i = 0; i < thisState.getArcCount(); i++) {
      Arc arc = thisState.getArc(i);
      State nextState = arc.getNextState();
      double dnext = d[arc.getNextState().getId()];
      double dnextnew = semiring.plus(dnext, semiring.times(rnew, arc.getWeight()));
      if (dnext != dnextnew) {
        d[arc.getNextState().getId()] = dnextnew;
        r[arc.getNextState().getId()] = semiring.plus(r[arc.getNextState().getId()], semiring.times(rnew, arc.getWeight()));
        int nextStateId = nextState.getId();
        if (!enqueuedStateIds.contains(nextStateId)) {
          queue.addLast(nextStateId);
          enqueuedStateIds.add(nextStateId);
          stateMap.put(nextStateId, nextState);
        }
      }
    }
  }
  return d;
}
 
Example #15
Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java    From Palmetto with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) {
    documents.add(wordDocuments[Integer.parseInt(word)]);
}
 
Example #16
Source File: ClassifiedClassNode.java    From gerbil with GNU Affero General Public License v3.0 4 votes vote down vote up
public void setClassIds(IntOpenHashSet classIds) {
    this.classIds = classIds;
}
 
Example #17
Source File: BooleanDocumentSupportingAdapter.java    From Palmetto with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * Determines the documents containing the given word. The ids of the found
 * documents are inserted into the given set.
 * 
 * @param word
 *            the word which should be searched
 * @param documents
 *            the set in which the document ids will be stored
 */
public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents);
 
Example #18
Source File: BooleanDocumentSupportingAdapter.java    From Palmetto with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * Determines the documents containing the words used as key in the given
 * map. The resulting sets contain the ids of the documents and are inserted
 * into the map.
 * 
 * @param wordDocMapping
 *            a mapping of words to documents in which the results are
 *            stored
 */
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping);