com.carrotsearch.hppc.ObjectObjectOpenHashMap Java Examples

The following examples show how to use com.carrotsearch.hppc.ObjectObjectOpenHashMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 6 votes vote down vote up
public CountedSubsets[] determineCounts(String[][] wordsets,
        SegmentationDefinition[] definitions) {
    ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping = new ObjectObjectOpenHashMap<String, IntOpenHashSet>();
    for (int i = 0; i < wordsets.length; ++i) {
        for (int j = 0; j < wordsets[i].length; ++j) {
            if (!wordDocMapping.containsKey(wordsets[i][j])) {
                wordDocMapping.put(wordsets[i][j], new IntOpenHashSet());
            }
        }
    }

    corpusAdapter.getDocumentsWithWordsAsSet(wordDocMapping);

    CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length];
    for (int i = 0; i < definitions.length; ++i) {
        /*countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                        createBitSets(wordDocMapping, wordsets[i]),
                        definitions[i].neededCounts));*/
        countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, createCounts(
                createBitSets(wordDocMapping, wordsets[i])));
    }
    return countedSubsets;
}
 
Example #2
Source File: ListBasedBooleanDocumentFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 6 votes vote down vote up
public CountedSubsets[] determineCounts(String[][] wordsets,
        SegmentationDefinition[] definitions) {
    ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping = new ObjectObjectOpenHashMap<String, IntArrayList>();
    for (int i = 0; i < wordsets.length; ++i) {
        for (int j = 0; j < wordsets[i].length; ++j) {
            if (!wordDocMapping.containsKey(wordsets[i][j])) {
                wordDocMapping.put(wordsets[i][j], new IntArrayList());
            }
        }
    }

    corpusAdapter.getDocumentsWithWords(wordDocMapping);

    CountedSubsets countedSubsets[] = new CountedSubsets[definitions.length];
    int counts[];
    for (int i = 0; i < definitions.length; ++i) {
        counts = createCounts(wordDocMapping, wordsets[i]);
        addCountsOfSubsets(counts);
        countedSubsets[i] = new CountedSubsets(definitions[i].segments,
                definitions[i].conditions, counts);
    }
    return countedSubsets;
}
 
Example #3
Source File: SpotlightClient.java    From gerbil with GNU Affero General Public License v3.0 5 votes vote down vote up
public SpotlightClient(String serviceURL, SpotlightAnnotator annotator) {
    this.serviceURL = serviceURL.endsWith("/") ? serviceURL : (serviceURL + "/");
    this.annotator = annotator;
    typePrefixToUriMapping = new ObjectObjectOpenHashMap<String, String>();
    for (int i = 0; i < TYPE_PREFIX_URI_MAPPING.length; ++i) {
        typePrefixToUriMapping.put(TYPE_PREFIX_URI_MAPPING[i][0], TYPE_PREFIX_URI_MAPPING[i][1]);
    }
}
 
Example #4
Source File: LuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            getDocumentsWithWordAsSet((String) keys[i], (IntOpenHashSet) values[i]);
        }
    }
}
 
Example #5
Source File: LuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            getDocumentsWithWord((String) keys[i], (IntArrayList) values[i]);
        }
    }
}
 
Example #6
Source File: BitSetBasedBooleanDocumentFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
private BitSet[] createBitSets(
        ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping,
        String[] wordset) {
    IntOpenHashSet hashSets[] = new IntOpenHashSet[wordset.length];
    IntOpenHashSet mergedHashSet = new IntOpenHashSet();
    for (int i = 0; i < hashSets.length; ++i) {
        hashSets[i] = wordDocMapping.get(wordset[i]);
        mergedHashSet.addAll(hashSets[i]);
    }
    return createBitSets(hashSets, mergedHashSet);
}
 
Example #7
Source File: ListBasedBooleanDocumentFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
private int[] createCounts(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping, String[] wordset) {
    int counts[] = new int[(1 << wordset.length)];
    IntArrayList wordDocuments[] = new IntArrayList[wordset.length];
    for (int i = 0; i < wordDocuments.length; ++i) {
        wordDocuments[i] = wordDocMapping.get(wordset[i]);
        Arrays.sort(wordDocuments[i].buffer, 0, wordDocuments[i].elementsCount);
    }

    int posInList[] = new int[wordDocuments.length];
    int nextDocId;
    int documentSignature = 0;
    counts[0] = -1;
    do {
        ++counts[documentSignature];
        nextDocId = Integer.MAX_VALUE;
        for (int i = 0; i < wordDocuments.length; ++i) {
            if ((posInList[i] < wordDocuments[i].elementsCount)
                    && (wordDocuments[i].buffer[posInList[i]] <= nextDocId)) {
                if (wordDocuments[i].buffer[posInList[i]] < nextDocId) {
                    nextDocId = wordDocuments[i].buffer[posInList[i]];
                    documentSignature = 0;
                }
                documentSignature |= 1 << i;
            }
        }
        for (int i = 0; i < posInList.length; ++i) {
            if ((documentSignature & (1 << i)) > 0) {
                ++posInList[i];
            }
        }
    } while (nextDocId != Integer.MAX_VALUE);
    return counts;
}
 
Example #8
Source File: SimpleLuceneIndexCreatorTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void test() throws CorruptIndexException, IOException {
    File indexDir = createTempDirectory();
    Iterator<String> docIterator = Arrays.asList(DOCUMENTS).iterator();
    // create the index
    SimpleLuceneIndexCreator creator = new SimpleLuceneIndexCreator(Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME);
    Assert.assertTrue(creator.createIndex(indexDir, docIterator));

    // test the created index
    // create an adapter
    LuceneCorpusAdapter adapter = LuceneCorpusAdapter.create(indexDir.getAbsolutePath(),
            Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME);
    // query the test words
    ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping = new ObjectObjectOpenHashMap<String, IntArrayList>();
    for (int i = 0; i < TEST_WORDS.length; ++i) {
        wordDocMapping.put(TEST_WORDS[i], new IntArrayList());
    }
    adapter.getDocumentsWithWords(wordDocMapping);
    // compare the result with the expected counts
    int retrievedDocs;
    for (int i = 0; i < TEST_WORDS.length; ++i) {
        retrievedDocs = wordDocMapping.get(TEST_WORDS[i]).elementsCount;
        Assert.assertEquals("Expected " + EXPECTED_DOC_COUNTS[i] + " documents containing the word \""
                + TEST_WORDS[i] + "\", but got " + retrievedDocs + " documents form the index.",
                EXPECTED_DOC_COUNTS[i], retrievedDocs);
    }
}
 
Example #9
Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            ((IntOpenHashSet) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]);
        }
    }
}
 
Example #10
Source File: AbstractBooleanDocumentSupportingAdapterBasedTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping) {
    Object keys[] = (Object[]) wordDocMapping.keys;
    Object values[] = (Object[]) wordDocMapping.values;
    for (int i = 0; i < wordDocMapping.allocated.length; ++i) {
        if (wordDocMapping.allocated[i]) {
            ((IntArrayList) values[i]).add(wordDocuments[Integer.parseInt((String) keys[i])]);
        }
    }
}
 
Example #11
Source File: BooleanDocumentSupportingAdapter.java    From Palmetto with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * Determines the documents containing the words used as key in the given
 * map. The resulting sets contain the ids of the documents and are inserted
 * into the map.
 * 
 * @param wordDocMapping
 *            a mapping of words to documents in which the results are
 *            stored
 */
public void getDocumentsWithWordsAsSet(ObjectObjectOpenHashMap<String, IntOpenHashSet> wordDocMapping);
 
Example #12
Source File: BooleanDocumentSupportingAdapter.java    From Palmetto with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * Determines the documents containing the words used as key in the given
 * map. The resulting int arrays contain the ids of the documents and are
 * inserted into the map.
 * 
 * @param wordDocMapping
 *            a mapping of words to documents in which the results are
 *            stored
 */
public void getDocumentsWithWords(ObjectObjectOpenHashMap<String, IntArrayList> wordDocMapping);