com.carrotsearch.hppc.IntObjectOpenHashMap Java Examples

The following examples show how to use com.carrotsearch.hppc.IntObjectOpenHashMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractSymbolTable.java    From jopenfst with MIT License 5 votes vote down vote up
protected AbstractSymbolTable(SymbolTable copyFrom) {

    this.symbolToId = new ObjectIntOpenHashMap<>(copyFrom.size());
    this.idToSymbol = new IntObjectOpenHashMap<>(copyFrom.size());
    for (ObjectIntCursor<String> cursor : copyFrom) {
      symbolToId.put(cursor.key, cursor.value);
      idToSymbol.put(cursor.value, cursor.key);
    }
  }
 
Example #2
Source File: AbstractVectorBasedCalculation.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public double[] calculateConfirmationValues(SubsetVectors subsetVectors) {
    int pos = 0;
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        pos += subsetVectors.conditions[i].length;
    }
    double values[] = new double[pos];

    IntObjectOpenHashMap<double[]> vectorCache = new IntObjectOpenHashMap<double[]>();
    for (int i = 0; i < subsetVectors.vectors.length; ++i) {
        vectorCache.put(1 << i, subsetVectors.vectors[i]);
    }
    double segmentVector[],
            conditionVector[];
    pos = 0;
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        if (vectorCache.containsKey(subsetVectors.segments[i])) {
            segmentVector = vectorCache.lget();
        } else {
            segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors);
            vectorCache.put(subsetVectors.segments[i], segmentVector);
        }
        for (int j = 0; j < subsetVectors.conditions[i].length; ++j) {
            if (vectorCache.containsKey(subsetVectors.conditions[i][j])) {
                conditionVector = vectorCache.lget();
            } else {
                conditionVector = createVector(subsetVectors.conditions[i][j], subsetVectors.vectors);
                vectorCache.put(subsetVectors.conditions[i][j], conditionVector);
            }
            values[pos] = calculateSimilarity(segmentVector, conditionVector);
            ++pos;
        }
    }
    return values;
}
 
Example #3
Source File: CentroidConfirmationMeasure.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public double[] calculateConfirmationValues(SubsetVectors subsetVectors) {
    double values[] = new double[subsetVectors.segments.length];

    double centroid[] = new double[subsetVectors.vectors[0].length];
    IntObjectOpenHashMap<double[]> vectorCache = new IntObjectOpenHashMap<double[]>();
    for (int i = 0; i < subsetVectors.vectors.length; ++i) {
        vectorCache.put(1 << i, subsetVectors.vectors[i]);
        for (int j = 0; j < centroid.length; j++) {
            centroid[j] += subsetVectors.vectors[i][j];
        }
    }
    // for (int j = 0; j < centroid.length; j++) {
    // centroid[j] /= subsetVectors.vectors.length;
    // }
    double segmentVector[];
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        if (vectorCache.containsKey(subsetVectors.segments[i])) {
            segmentVector = vectorCache.lget();
        } else {
            segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors);
            vectorCache.put(subsetVectors.segments[i], segmentVector);
        }
        values[i] = calculateSimilarity(segmentVector, centroid);
    }
    return values;
}
 
Example #4
Source File: WindowSupportingLuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = new IntObjectOpenHashMap<IntArrayList[]>();
    for (int i = 0; i < words.length; ++i) {
        requestDocumentsWithWord(words[i], positionsInDocs, docLengths, i, words.length);
    }
    return positionsInDocs;
}
 
Example #5
Source File: AbstractWindowBasedFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
protected int[] determineCounts(String wordset[]) {
    int counts[] = new int[(1 << wordset.length)];
    IntArrayList positions[];
    IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = corpusAdapter.requestWordPositionsInDocuments(wordset,
            docLengths);
    for (int i = 0; i < positionsInDocs.keys.length; ++i) {
        if (positionsInDocs.allocated[i]) {
            positions = ((IntArrayList[]) ((Object[]) positionsInDocs.values)[i]);
            addCountsFromDocument(positions, counts, docLengths.get(positionsInDocs.keys[i]));
        }
    }
    return counts;
}
 
Example #6
Source File: AbstractSegmentatorTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testSubsetCreator(int wordsetSize, Segmentator subsetCreator,
            int expectedSegments[], int expectedConditions[][]) {
        IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping = new IntObjectOpenHashMap<IntOpenHashSet>();
        BitSet neededCounts = new BitSet();
        createSets(expectedSegments, expectedConditions,
                segmentToConditionMapping, neededCounts);

        SegmentationDefinition definition = subsetCreator.getSubsetDefinition(wordsetSize);

//        compare(definition, segmentToConditionMapping, neededCounts);
        compare(definition, segmentToConditionMapping);
    }
 
Example #7
Source File: AbstractSegmentatorTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
private void compare(SegmentationDefinition definition,
                     IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping) {
    IntOpenHashSet conditionSet;

    Assert.assertEquals(segmentToConditionMapping.size(),
            definition.segments.length);
    for (int i = 0; i < definition.segments.length; i++) {
        Assert.assertTrue("got unexpected segment "
                + definition.segments[i], segmentToConditionMapping
                .containsKey(definition.segments[i]));
        conditionSet = segmentToConditionMapping
                .get(definition.segments[i]);
        for (int j = 0; j < definition.conditions[i].length; ++j) {
            Assert.assertEquals(
                    "expected " + conditionSet.size() + " conditions "
                            + conditionSet.toString()
                            + " for segment ["
                            + definition.segments[i] + "] but got " + definition.conditions[i].length + " "
                            + Arrays.toString(definition.conditions[i]) + ".",
                    conditionSet.size(),
                    definition.conditions[i].length);
            Assert.assertTrue("got unexpected condition "
                    + definition.conditions[i][j] + " for segment "
                    + definition.segments[i],
                    conditionSet.contains(definition.conditions[i][j]));
        }
    }

    // Assert.assertEquals(neededCounts, definition.neededCounts);
}
 
Example #8
Source File: BooleanSlidingWindowProbabilitySupplierTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if (positions[i].length > 0) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example #9
Source File: ContextWindowFrequencyDeterminerCountingTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example #10
Source File: BooleanSlidingWindowFrequencyDeterminerCountingTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example #11
Source File: NShortestPaths.java    From jopenfst with MIT License 4 votes vote down vote up
/**
 * Calculates the shortest distances from each state to the final.
 *
 * See: M. Mohri, "Semiring Framework and Algorithms for Shortest-Distance Problems", Journal of Automata, Languages
 * and Combinatorics, 7(3), pp. 321-350, 2002.
 *
 * @param fst the fst to calculate the shortest distances
 * @return the array containing the shortest distances
 */
private static double[] shortestDistance(Fst fst) {

  Fst reversed = Reverse.reverse(fst);

  double[] d = new double[reversed.getStateCount()];
  double[] r = new double[reversed.getStateCount()];

  Semiring semiring = reversed.getSemiring();

  for (int i = 0; i < d.length; i++) {
    d[i] = semiring.zero();
    r[i] = semiring.zero();
  }

  IntObjectOpenHashMap<State> stateMap = new IntObjectOpenHashMap<>();
  Deque<Integer> queue = new LinkedList<>();
  IntOpenHashSet enqueuedStateIds = new IntOpenHashSet();

  queue.addLast(reversed.getStartState().getId());
  stateMap.put(reversed.getStartState().getId(), reversed.getStartState());

  d[reversed.getStartState().getId()] = semiring.one();
  r[reversed.getStartState().getId()] = semiring.one();

  while (!queue.isEmpty()) {
    int thisStateId = queue.removeFirst();
    enqueuedStateIds.remove(thisStateId);
    State thisState = stateMap.get(thisStateId);
    double rnew = r[thisState.getId()];
    r[thisState.getId()] = semiring.zero();

    for (int i = 0; i < thisState.getArcCount(); i++) {
      Arc arc = thisState.getArc(i);
      State nextState = arc.getNextState();
      double dnext = d[arc.getNextState().getId()];
      double dnextnew = semiring.plus(dnext, semiring.times(rnew, arc.getWeight()));
      if (dnext != dnextnew) {
        d[arc.getNextState().getId()] = dnextnew;
        r[arc.getNextState().getId()] = semiring.plus(r[arc.getNextState().getId()], semiring.times(rnew, arc.getWeight()));
        int nextStateId = nextState.getId();
        if (!enqueuedStateIds.contains(nextStateId)) {
          queue.addLast(nextStateId);
          enqueuedStateIds.add(nextStateId);
          stateMap.put(nextStateId, nextState);
        }
      }
    }
  }
  return d;
}
 
Example #12
Source File: AbstractSymbolTable.java    From jopenfst with MIT License 4 votes vote down vote up
protected AbstractSymbolTable() {
  this.symbolToId = new ObjectIntOpenHashMap<>();
  this.idToSymbol = new IntObjectOpenHashMap<>();
}
 
Example #13
Source File: WindowSupportingLuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 4 votes vote down vote up
protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs,
        IntIntOpenHashMap docLengths, int wordId, int numberOfWords) {
    DocsAndPositionsEnum docPosEnum = null;
    Term term = new Term(fieldName, word);
    int localDocId,
            globalDocId,
            baseDocId;
    IntArrayList positions[];
    try {
        for (int i = 0; i < reader.length; i++) {
            docPosEnum = reader[i].termPositionsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docPosEnum != null) {
                while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    localDocId = docPosEnum.docID();
                    globalDocId = localDocId + baseDocId;
                    // if this is the first word and we found a new document
                    if (!positionsInDocs.containsKey(globalDocId)) {
                        positions = new IntArrayList[numberOfWords];
                        positionsInDocs.put(globalDocId, positions);
                    } else {
                        positions = positionsInDocs.get(globalDocId);
                    }
                    if (positions[wordId] == null) {
                        positions[wordId] = new IntArrayList();
                    }
                    // Go through the positions inside this document
                    for (int p = 0; p < docPosEnum.freq(); ++p) {
                        positions[wordId].add(docPosEnum.nextPosition());
                    }
                    if (!docLengths.containsKey(globalDocId)) {
                        // Get the length of the document
                        docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName)
                                .numericValue().intValue());
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example #14
Source File: PositionStoringLuceneIndexCreatorTest.java    From Palmetto with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void test() throws CorruptIndexException, IOException {
    File indexDir = new File(
            FileUtils.getTempDirectoryPath() + File.separator + "temp_index" + Long.toString(System.nanoTime()));
    Assert.assertTrue(indexDir.mkdir());
    Iterator<IndexableDocument> docIterator = Arrays.asList(DOCUMENTS).iterator();
    // create the index
    PositionStoringLuceneIndexCreator creator = new PositionStoringLuceneIndexCreator(
            Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    Assert.assertTrue(creator.createIndex(indexDir, docIterator));
    LuceneIndexHistogramCreator hCreator = new LuceneIndexHistogramCreator(
            Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    hCreator.createLuceneIndexHistogram(indexDir.getAbsolutePath());

    // test the created index
    // create an adapter
    WindowSupportingLuceneCorpusAdapter adapter = null;
    try {
        adapter = WindowSupportingLuceneCorpusAdapter.create(indexDir.getAbsolutePath(),
                Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
        // query the test words
        IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
        IntObjectOpenHashMap<IntArrayList[]> wordPositions = adapter.requestWordPositionsInDocuments(TEST_WORDS,
                docLengths);
        // compare the result with the expected counts
        int positionInDoc;
        IntArrayList[] positionsInDocs;
        for (int i = 0; i < EXPECTED_WORD_POSITIONS.length; ++i) {
            positionsInDocs = wordPositions.get(i);
            for (int j = 0; j < positionsInDocs.length; ++j) {
                if (EXPECTED_WORD_POSITIONS[i][j] < 0) {
                    Assert.assertNull("Expected null because the word \"" + TEST_WORDS[j]
                            + "\" shouldn't be found inside document " + i + ". But got a position list instead.",
                            positionsInDocs[j]);
                } else {
                    Assert.assertEquals(1, positionsInDocs[j].elementsCount);
                    positionInDoc = positionsInDocs[j].buffer[0];
                    Assert.assertEquals("Expected the word \"" + TEST_WORDS[j] + "\" in document " + i
                            + " at position " + EXPECTED_WORD_POSITIONS[i][j] + " but got position " + positionInDoc
                            + " form the index.", EXPECTED_WORD_POSITIONS[i][j], positionInDoc);
                }
            }
        }

        // test the window based counting
        BooleanSlidingWindowFrequencyDeterminer determiner = new BooleanSlidingWindowFrequencyDeterminer(adapter,
                WINDOW_SIZE);
        CountedSubsets subsets = determiner.determineCounts(new String[][] { TEST_WORDS },
                new SegmentationDefinition[] { new SegmentationDefinition(new int[0], new int[0][0], null) })[0];
        Assert.assertArrayEquals(EXPECTED_COUNTS, subsets.counts);
    } finally {
        if (adapter != null) {
            adapter.close();
        }
    }
}
 
Example #15
Source File: BooleanSlidingWindowFrequencyDeterminerSumCreationTest.java    From Palmetto with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    return null;
}
 
Example #16
Source File: WindowSupportingAdapter.java    From Palmetto with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * Returns the positions of the given words inside the corpus.
 * 
 * @param words
 *            the words for which the positions inside the documents should
 *            be determined
 * @param docLengths
 *            empty int int map in which the document lengths and counts are
 *            inserted
 * @return the positions of the given words inside the corpus
 */
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String words[],
        IntIntOpenHashMap docLengths);