Java Code Examples for com.carrotsearch.hppc.IntObjectOpenHashMap

The following examples show how to use com.carrotsearch.hppc.IntObjectOpenHashMap. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: jopenfst   Source File: AbstractSymbolTable.java    License: MIT License 5 votes vote down vote up
protected AbstractSymbolTable(SymbolTable copyFrom) {

    this.symbolToId = new ObjectIntOpenHashMap<>(copyFrom.size());
    this.idToSymbol = new IntObjectOpenHashMap<>(copyFrom.size());
    for (ObjectIntCursor<String> cursor : copyFrom) {
      symbolToId.put(cursor.key, cursor.value);
      idToSymbol.put(cursor.value, cursor.key);
    }
  }
 
Example 2
@Override
public double[] calculateConfirmationValues(SubsetVectors subsetVectors) {
    int pos = 0;
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        pos += subsetVectors.conditions[i].length;
    }
    double values[] = new double[pos];

    IntObjectOpenHashMap<double[]> vectorCache = new IntObjectOpenHashMap<double[]>();
    for (int i = 0; i < subsetVectors.vectors.length; ++i) {
        vectorCache.put(1 << i, subsetVectors.vectors[i]);
    }
    double segmentVector[],
            conditionVector[];
    pos = 0;
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        if (vectorCache.containsKey(subsetVectors.segments[i])) {
            segmentVector = vectorCache.lget();
        } else {
            segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors);
            vectorCache.put(subsetVectors.segments[i], segmentVector);
        }
        for (int j = 0; j < subsetVectors.conditions[i].length; ++j) {
            if (vectorCache.containsKey(subsetVectors.conditions[i][j])) {
                conditionVector = vectorCache.lget();
            } else {
                conditionVector = createVector(subsetVectors.conditions[i][j], subsetVectors.vectors);
                vectorCache.put(subsetVectors.conditions[i][j], conditionVector);
            }
            values[pos] = calculateSimilarity(segmentVector, conditionVector);
            ++pos;
        }
    }
    return values;
}
 
Example 3
@Override
public double[] calculateConfirmationValues(SubsetVectors subsetVectors) {
    double values[] = new double[subsetVectors.segments.length];

    double centroid[] = new double[subsetVectors.vectors[0].length];
    IntObjectOpenHashMap<double[]> vectorCache = new IntObjectOpenHashMap<double[]>();
    for (int i = 0; i < subsetVectors.vectors.length; ++i) {
        vectorCache.put(1 << i, subsetVectors.vectors[i]);
        for (int j = 0; j < centroid.length; j++) {
            centroid[j] += subsetVectors.vectors[i][j];
        }
    }
    // for (int j = 0; j < centroid.length; j++) {
    // centroid[j] /= subsetVectors.vectors.length;
    // }
    double segmentVector[];
    for (int i = 0; i < subsetVectors.segments.length; ++i) {
        if (vectorCache.containsKey(subsetVectors.segments[i])) {
            segmentVector = vectorCache.lget();
        } else {
            segmentVector = createVector(subsetVectors.segments[i], subsetVectors.vectors);
            vectorCache.put(subsetVectors.segments[i], segmentVector);
        }
        values[i] = calculateSimilarity(segmentVector, centroid);
    }
    return values;
}
 
Example 4
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = new IntObjectOpenHashMap<IntArrayList[]>();
    for (int i = 0; i < words.length; ++i) {
        requestDocumentsWithWord(words[i], positionsInDocs, docLengths, i, words.length);
    }
    return positionsInDocs;
}
 
Example 5
protected int[] determineCounts(String wordset[]) {
    int counts[] = new int[(1 << wordset.length)];
    IntArrayList positions[];
    IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = corpusAdapter.requestWordPositionsInDocuments(wordset,
            docLengths);
    for (int i = 0; i < positionsInDocs.keys.length; ++i) {
        if (positionsInDocs.allocated[i]) {
            positions = ((IntArrayList[]) ((Object[]) positionsInDocs.values)[i]);
            addCountsFromDocument(positions, counts, docLengths.get(positionsInDocs.keys[i]));
        }
    }
    return counts;
}
 
Example 6
public void testSubsetCreator(int wordsetSize, Segmentator subsetCreator,
            int expectedSegments[], int expectedConditions[][]) {
        IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping = new IntObjectOpenHashMap<IntOpenHashSet>();
        BitSet neededCounts = new BitSet();
        createSets(expectedSegments, expectedConditions,
                segmentToConditionMapping, neededCounts);

        SegmentationDefinition definition = subsetCreator.getSubsetDefinition(wordsetSize);

//        compare(definition, segmentToConditionMapping, neededCounts);
        compare(definition, segmentToConditionMapping);
    }
 
Example 7
private void compare(SegmentationDefinition definition,
                     IntObjectOpenHashMap<IntOpenHashSet> segmentToConditionMapping) {
    IntOpenHashSet conditionSet;

    Assert.assertEquals(segmentToConditionMapping.size(),
            definition.segments.length);
    for (int i = 0; i < definition.segments.length; i++) {
        Assert.assertTrue("got unexpected segment "
                + definition.segments[i], segmentToConditionMapping
                .containsKey(definition.segments[i]));
        conditionSet = segmentToConditionMapping
                .get(definition.segments[i]);
        for (int j = 0; j < definition.conditions[i].length; ++j) {
            Assert.assertEquals(
                    "expected " + conditionSet.size() + " conditions "
                            + conditionSet.toString()
                            + " for segment ["
                            + definition.segments[i] + "] but got " + definition.conditions[i].length + " "
                            + Arrays.toString(definition.conditions[i]) + ".",
                    conditionSet.size(),
                    definition.conditions[i].length);
            Assert.assertTrue("got unexpected condition "
                    + definition.conditions[i][j] + " for segment "
                    + definition.segments[i],
                    conditionSet.contains(definition.conditions[i][j]));
        }
    }

    // Assert.assertEquals(neededCounts, definition.neededCounts);
}
 
Example 8
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if (positions[i].length > 0) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example 9
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example 10
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example 11
Source Project: jopenfst   Source File: NShortestPaths.java    License: MIT License 4 votes vote down vote up
/**
 * Calculates the shortest distances from each state to the final.
 *
 * See: M. Mohri, "Semiring Framework and Algorithms for Shortest-Distance Problems", Journal of Automata, Languages
 * and Combinatorics, 7(3), pp. 321-350, 2002.
 *
 * @param fst the fst to calculate the shortest distances
 * @return the array containing the shortest distances
 */
private static double[] shortestDistance(Fst fst) {

  Fst reversed = Reverse.reverse(fst);

  double[] d = new double[reversed.getStateCount()];
  double[] r = new double[reversed.getStateCount()];

  Semiring semiring = reversed.getSemiring();

  for (int i = 0; i < d.length; i++) {
    d[i] = semiring.zero();
    r[i] = semiring.zero();
  }

  IntObjectOpenHashMap<State> stateMap = new IntObjectOpenHashMap<>();
  Deque<Integer> queue = new LinkedList<>();
  IntOpenHashSet enqueuedStateIds = new IntOpenHashSet();

  queue.addLast(reversed.getStartState().getId());
  stateMap.put(reversed.getStartState().getId(), reversed.getStartState());

  d[reversed.getStartState().getId()] = semiring.one();
  r[reversed.getStartState().getId()] = semiring.one();

  while (!queue.isEmpty()) {
    int thisStateId = queue.removeFirst();
    enqueuedStateIds.remove(thisStateId);
    State thisState = stateMap.get(thisStateId);
    double rnew = r[thisState.getId()];
    r[thisState.getId()] = semiring.zero();

    for (int i = 0; i < thisState.getArcCount(); i++) {
      Arc arc = thisState.getArc(i);
      State nextState = arc.getNextState();
      double dnext = d[arc.getNextState().getId()];
      double dnextnew = semiring.plus(dnext, semiring.times(rnew, arc.getWeight()));
      if (dnext != dnextnew) {
        d[arc.getNextState().getId()] = dnextnew;
        r[arc.getNextState().getId()] = semiring.plus(r[arc.getNextState().getId()], semiring.times(rnew, arc.getWeight()));
        int nextStateId = nextState.getId();
        if (!enqueuedStateIds.contains(nextStateId)) {
          queue.addLast(nextStateId);
          enqueuedStateIds.add(nextStateId);
          stateMap.put(nextStateId, nextState);
        }
      }
    }
  }
  return d;
}
 
Example 12
Source Project: jopenfst   Source File: AbstractSymbolTable.java    License: MIT License 4 votes vote down vote up
protected AbstractSymbolTable() {
  this.symbolToId = new ObjectIntOpenHashMap<>();
  this.idToSymbol = new IntObjectOpenHashMap<>();
}
 
Example 13
protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs,
        IntIntOpenHashMap docLengths, int wordId, int numberOfWords) {
    DocsAndPositionsEnum docPosEnum = null;
    Term term = new Term(fieldName, word);
    int localDocId,
            globalDocId,
            baseDocId;
    IntArrayList positions[];
    try {
        for (int i = 0; i < reader.length; i++) {
            docPosEnum = reader[i].termPositionsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docPosEnum != null) {
                while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    localDocId = docPosEnum.docID();
                    globalDocId = localDocId + baseDocId;
                    // if this is the first word and we found a new document
                    if (!positionsInDocs.containsKey(globalDocId)) {
                        positions = new IntArrayList[numberOfWords];
                        positionsInDocs.put(globalDocId, positions);
                    } else {
                        positions = positionsInDocs.get(globalDocId);
                    }
                    if (positions[wordId] == null) {
                        positions[wordId] = new IntArrayList();
                    }
                    // Go through the positions inside this document
                    for (int p = 0; p < docPosEnum.freq(); ++p) {
                        positions[wordId].add(docPosEnum.nextPosition());
                    }
                    if (!docLengths.containsKey(globalDocId)) {
                        // Get the length of the document
                        docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName)
                                .numericValue().intValue());
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example 14
@Test
public void test() throws CorruptIndexException, IOException {
    File indexDir = new File(
            FileUtils.getTempDirectoryPath() + File.separator + "temp_index" + Long.toString(System.nanoTime()));
    Assert.assertTrue(indexDir.mkdir());
    Iterator<IndexableDocument> docIterator = Arrays.asList(DOCUMENTS).iterator();
    // create the index
    PositionStoringLuceneIndexCreator creator = new PositionStoringLuceneIndexCreator(
            Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    Assert.assertTrue(creator.createIndex(indexDir, docIterator));
    LuceneIndexHistogramCreator hCreator = new LuceneIndexHistogramCreator(
            Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    hCreator.createLuceneIndexHistogram(indexDir.getAbsolutePath());

    // test the created index
    // create an adapter
    WindowSupportingLuceneCorpusAdapter adapter = null;
    try {
        adapter = WindowSupportingLuceneCorpusAdapter.create(indexDir.getAbsolutePath(),
                Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
        // query the test words
        IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
        IntObjectOpenHashMap<IntArrayList[]> wordPositions = adapter.requestWordPositionsInDocuments(TEST_WORDS,
                docLengths);
        // compare the result with the expected counts
        int positionInDoc;
        IntArrayList[] positionsInDocs;
        for (int i = 0; i < EXPECTED_WORD_POSITIONS.length; ++i) {
            positionsInDocs = wordPositions.get(i);
            for (int j = 0; j < positionsInDocs.length; ++j) {
                if (EXPECTED_WORD_POSITIONS[i][j] < 0) {
                    Assert.assertNull("Expected null because the word \"" + TEST_WORDS[j]
                            + "\" shouldn't be found inside document " + i + ". But got a position list instead.",
                            positionsInDocs[j]);
                } else {
                    Assert.assertEquals(1, positionsInDocs[j].elementsCount);
                    positionInDoc = positionsInDocs[j].buffer[0];
                    Assert.assertEquals("Expected the word \"" + TEST_WORDS[j] + "\" in document " + i
                            + " at position " + EXPECTED_WORD_POSITIONS[i][j] + " but got position " + positionInDoc
                            + " form the index.", EXPECTED_WORD_POSITIONS[i][j], positionInDoc);
                }
            }
        }

        // test the window based counting
        BooleanSlidingWindowFrequencyDeterminer determiner = new BooleanSlidingWindowFrequencyDeterminer(adapter,
                WINDOW_SIZE);
        CountedSubsets subsets = determiner.determineCounts(new String[][] { TEST_WORDS },
                new SegmentationDefinition[] { new SegmentationDefinition(new int[0], new int[0][0], null) })[0];
        Assert.assertArrayEquals(EXPECTED_COUNTS, subsets.counts);
    } finally {
        if (adapter != null) {
            adapter.close();
        }
    }
}
 
Example 15
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    return null;
}
 
Example 16
/**
 * Returns the positions of the given words inside the corpus.
 * 
 * @param words
 *            the words for which the positions inside the documents should
 *            be determined
 * @param docLengths
 *            empty int int map in which the document lengths and counts are
 *            inserted
 * @return the positions of the given words inside the corpus
 */
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String words[],
        IntIntOpenHashMap docLengths);