com.carrotsearch.hppc.IntIntOpenHashMap Java Examples

The following examples show how to use com.carrotsearch.hppc.IntIntOpenHashMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SpanMergingEvaluatorDecorator.java    From gerbil with GNU Affero General Public License v3.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
protected List<T> merge(List<T> spans) {
    Span spanArray[] = spans.toArray(new Span[spans.size()]);
    Arrays.sort(spanArray, this);
    IntIntOpenHashMap enclosedByMap = new IntIntOpenHashMap();
    boolean isEnclosed;
    for (int i = 0; i < spanArray.length; ++i) {
        isEnclosed = false;
        for (int j = spanArray.length - 1; (j > i) && (!isEnclosed); --j) {
            // if spanArray[i] is enclosed by spanArray[j]
            if ((spanArray[i].getStartPosition() >= spanArray[j].getStartPosition())
                    && ((spanArray[i].getStartPosition() + spanArray[i].getLength()) <= (spanArray[j]
                            .getStartPosition() + spanArray[j].getLength()))) {
                enclosedByMap.put(i, j);
                isEnclosed = true;
            }
        }
    }
    // if no match could be found
    if (enclosedByMap.size() == 0) {
        return spans;
    }

    List<T> mergedMarkings = new ArrayList<T>(spans.size());
    // starting with the smallest span, check if a span is enclosed by
    // another
    int largerSpanId;
    for (int i = 0; i < spanArray.length; ++i) {
        if (enclosedByMap.containsKey(i)) {
            largerSpanId = enclosedByMap.lget();
            spanArray[largerSpanId] = merge(spanArray[i], spanArray[largerSpanId]);
        } else {
            mergedMarkings.add((T) spanArray[i]);
        }
    }
    return mergedMarkings;
}
 
Example #2
Source File: WindowSupportingLuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = new IntObjectOpenHashMap<IntArrayList[]>();
    for (int i = 0; i < words.length; ++i) {
        requestDocumentsWithWord(words[i], positionsInDocs, docLengths, i, words.length);
    }
    return positionsInDocs;
}
 
Example #3
Source File: AbstractWindowBasedFrequencyDeterminer.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
protected int[] determineCounts(String wordset[]) {
    int counts[] = new int[(1 << wordset.length)];
    IntArrayList positions[];
    IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocs = corpusAdapter.requestWordPositionsInDocuments(wordset,
            docLengths);
    for (int i = 0; i < positionsInDocs.keys.length; ++i) {
        if (positionsInDocs.allocated[i]) {
            positions = ((IntArrayList[]) ((Object[]) positionsInDocs.values)[i]);
            addCountsFromDocument(positions, counts, docLengths.get(positionsInDocs.keys[i]));
        }
    }
    return counts;
}
 
Example #4
Source File: BooleanSlidingWindowProbabilitySupplierTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if (positions[i].length > 0) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example #5
Source File: ContextWindowFrequencyDeterminerCountingTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example #6
Source File: BooleanSlidingWindowFrequencyDeterminerCountingTest.java    From Palmetto with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    IntObjectOpenHashMap<IntArrayList[]> positionsInDocuments = new IntObjectOpenHashMap<IntArrayList[]>();
    IntArrayList[] positionsInDocument = new IntArrayList[positions.length];
    for (int i = 0; i < positionsInDocument.length; ++i) {
        if ((positions[i] != null) && (positions[i].length > 0)) {
            positionsInDocument[i] = new IntArrayList();
            positionsInDocument[i].add(positions[i]);
        }
    }
    positionsInDocuments.put(0, positionsInDocument);
    docLengths.put(0, docLength);
    return positionsInDocuments;
}
 
Example #7
Source File: GeneralizationHierarchy.java    From arx with Apache License 2.0 5 votes vote down vote up
/**
 * Throws an exception, if the hierarchy is not monotonic.
 * 
 * @param manager
 */
public void checkMonotonicity(DataManager manager) {
    
    // Obtain dictionary
    String[] dictionary = null;
    String[] header = manager.getDataGeneralized().getHeader();
    for (int i=0; i<header.length; i++) {
        if (header[i].equals(attribute)) {
            dictionary = manager.getDataGeneralized().getDictionary().getMapping()[i];
        }
    }
    
    // Check
    if (dictionary==null) {
        throw new IllegalStateException("Cannot obtain dictionary for attribute ("+attribute+")");
    }
    
    // Level value -> level+1 value
    final IntIntOpenHashMap hMap = new IntIntOpenHashMap();
    
    // Input->level->output.
    for (int level = 0; level < (map[0].length - 1); level++) {
        hMap.clear();
        for (int i = 0; i < map.length; i++) {
            final int outputCurrentLevel = map[i][level];
            final int outputNextLevel = map[i][level + 1];
            if (hMap.containsKey(outputCurrentLevel)) {
                final int compare = hMap.get(outputCurrentLevel);
                if (compare != outputNextLevel) { 
                    String in = dictionary[outputCurrentLevel];
                    String out1 = dictionary[compare];
                    String out2 = dictionary[outputNextLevel];
                    throw new IllegalArgumentException("The transformation rule for the attribute '" + attribute + "' is not a hierarchy. ("+in+") can either be transformed to ("+out1+") or to ("+out2+")");
                }
            } else {
                hMap.put(outputCurrentLevel, outputNextLevel);
            }
        }
    }
}
 
Example #8
Source File: WindowSupportingLuceneCorpusAdapter.java    From Palmetto with GNU Affero General Public License v3.0 4 votes vote down vote up
protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs,
        IntIntOpenHashMap docLengths, int wordId, int numberOfWords) {
    DocsAndPositionsEnum docPosEnum = null;
    Term term = new Term(fieldName, word);
    int localDocId,
            globalDocId,
            baseDocId;
    IntArrayList positions[];
    try {
        for (int i = 0; i < reader.length; i++) {
            docPosEnum = reader[i].termPositionsEnum(term);
            baseDocId = contexts[i].docBase;
            if (docPosEnum != null) {
                while (docPosEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    localDocId = docPosEnum.docID();
                    globalDocId = localDocId + baseDocId;
                    // if this is the first word and we found a new document
                    if (!positionsInDocs.containsKey(globalDocId)) {
                        positions = new IntArrayList[numberOfWords];
                        positionsInDocs.put(globalDocId, positions);
                    } else {
                        positions = positionsInDocs.get(globalDocId);
                    }
                    if (positions[wordId] == null) {
                        positions[wordId] = new IntArrayList();
                    }
                    // Go through the positions inside this document
                    for (int p = 0; p < docPosEnum.freq(); ++p) {
                        positions[wordId].add(docPosEnum.nextPosition());
                    }
                    if (!docLengths.containsKey(globalDocId)) {
                        // Get the length of the document
                        docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName)
                                .numericValue().intValue());
                    }
                }
            }
        }
    } catch (IOException e) {
        LOGGER.error("Error while requesting documents for word \"" + word + "\".", e);
    }
}
 
Example #9
Source File: PositionStoringLuceneIndexCreatorTest.java    From Palmetto with GNU Affero General Public License v3.0 4 votes vote down vote up
@Test
public void test() throws CorruptIndexException, IOException {
    File indexDir = new File(
            FileUtils.getTempDirectoryPath() + File.separator + "temp_index" + Long.toString(System.nanoTime()));
    Assert.assertTrue(indexDir.mkdir());
    Iterator<IndexableDocument> docIterator = Arrays.asList(DOCUMENTS).iterator();
    // create the index
    PositionStoringLuceneIndexCreator creator = new PositionStoringLuceneIndexCreator(
            Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    Assert.assertTrue(creator.createIndex(indexDir, docIterator));
    LuceneIndexHistogramCreator hCreator = new LuceneIndexHistogramCreator(
            Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
    hCreator.createLuceneIndexHistogram(indexDir.getAbsolutePath());

    // test the created index
    // create an adapter
    WindowSupportingLuceneCorpusAdapter adapter = null;
    try {
        adapter = WindowSupportingLuceneCorpusAdapter.create(indexDir.getAbsolutePath(),
                Palmetto.DEFAULT_TEXT_INDEX_FIELD_NAME, Palmetto.DEFAULT_DOCUMENT_LENGTH_INDEX_FIELD_NAME);
        // query the test words
        IntIntOpenHashMap docLengths = new IntIntOpenHashMap();
        IntObjectOpenHashMap<IntArrayList[]> wordPositions = adapter.requestWordPositionsInDocuments(TEST_WORDS,
                docLengths);
        // compare the result with the expected counts
        int positionInDoc;
        IntArrayList[] positionsInDocs;
        for (int i = 0; i < EXPECTED_WORD_POSITIONS.length; ++i) {
            positionsInDocs = wordPositions.get(i);
            for (int j = 0; j < positionsInDocs.length; ++j) {
                if (EXPECTED_WORD_POSITIONS[i][j] < 0) {
                    Assert.assertNull("Expected null because the word \"" + TEST_WORDS[j]
                            + "\" shouldn't be found inside document " + i + ". But got a position list instead.",
                            positionsInDocs[j]);
                } else {
                    Assert.assertEquals(1, positionsInDocs[j].elementsCount);
                    positionInDoc = positionsInDocs[j].buffer[0];
                    Assert.assertEquals("Expected the word \"" + TEST_WORDS[j] + "\" in document " + i
                            + " at position " + EXPECTED_WORD_POSITIONS[i][j] + " but got position " + positionInDoc
                            + " form the index.", EXPECTED_WORD_POSITIONS[i][j], positionInDoc);
                }
            }
        }

        // test the window based counting
        BooleanSlidingWindowFrequencyDeterminer determiner = new BooleanSlidingWindowFrequencyDeterminer(adapter,
                WINDOW_SIZE);
        CountedSubsets subsets = determiner.determineCounts(new String[][] { TEST_WORDS },
                new SegmentationDefinition[] { new SegmentationDefinition(new int[0], new int[0][0], null) })[0];
        Assert.assertArrayEquals(EXPECTED_COUNTS, subsets.counts);
    } finally {
        if (adapter != null) {
            adapter.close();
        }
    }
}
 
Example #10
Source File: BooleanSlidingWindowFrequencyDeterminerSumCreationTest.java    From Palmetto with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String[] words,
        IntIntOpenHashMap docLengths) {
    return null;
}
 
Example #11
Source File: MetricMDNUEntropyPrecomputed.java    From arx with Apache License 2.0 4 votes vote down vote up
@Override
/**
 * Implements the score function described in Section 5.3 of the article
 * 
 * Bild R, Kuhn KA, Prasser F. SafePub: A Truthful Data Anonymization Algorithm With Strong Privacy Guarantees.
 * Proceedings on Privacy Enhancing Technologies. 2018(1):67-87.
 */
public ILScore getScore(final Transformation<?> node, final HashGroupify groupify) {
    
    if (k < 0) {
        throw new RuntimeException("Parameters required for differential privacy have not been initialized yet");
    }
    
    // Prepare
    int dimensionsGeneralized = getDimensionsGeneralized();
    IntIntOpenHashMap[] nonSuppressedValueToCount = new IntIntOpenHashMap[dimensionsGeneralized];
    for (int dimension=0; dimension<dimensionsGeneralized; dimension++) {
        nonSuppressedValueToCount[dimension] = new IntIntOpenHashMap();
    }

    // Compute score. The casts to long are required to avoid integer overflows
    // when large numbers are being multiplied.
    BigFraction score = BigFraction.ZERO;
    HashGroupifyEntry m = groupify.getFirstEquivalenceClass();
    while (m != null) {
        m.read();
        for (int dimension=0; dimension<dimensionsGeneralized; dimension++) {
            int value = m.next();
            // Process values of records which have not been suppressed by sampling
            if (m.isNotOutlier && (rootValues[dimension] == -1 || value != rootValues[dimension])) {
                // The attribute value has neither been suppressed because of record suppression nor because of generalization
                nonSuppressedValueToCount[dimension].putOrAdd(value, m.count, m.count);
            } else {
                // The attribute value has been suppressed because of record suppression or because of generalization
                score = score.add(new BigFraction((long)m.count * (long)rows));
            }
            // Add values for records which have been suppressed by sampling
            score = score.add(new BigFraction((long)(m.pcount - m.count) * (long)rows));
        }
        m = m.nextOrdered;
    }
    // Add values for all attribute values which were not suppressed
    for (int dimension=0; dimension<dimensionsGeneralized; dimension++) {
        final boolean [] states = nonSuppressedValueToCount[dimension].allocated;
        final int [] counts = nonSuppressedValueToCount[dimension].values;
        for (int i=0; i<states.length; i++) {
            if (states[i]) {
                score = score.add(new BigFraction((long)counts[i] * (long)counts[i]));
            }
        }
    }

    // Adjust sensitivity and multiply with -1 so that higher values are better
    score = score.multiply(BigFraction.MINUS_ONE.divide(new BigFraction(((long)rows * (long)dimensionsGeneralized))));
    score = score.divide((k == 1) ? new BigFraction(5) : new BigFraction(k * k).divide(new BigFraction(k - 1)).add(BigFraction.ONE));
    
    // Return score
    return new ILScore(score);
}
 
Example #12
Source File: RiskModelHistogram.java    From arx with Apache License 2.0 4 votes vote down vote up
/**
 * Convert and analyze
 * 
 * @param grouped
 * @param stop
 * @param progress
 */
private void convertAndAnalyze(IntIntOpenHashMap grouped,
                               final WrappedBoolean stop,
                               final WrappedInteger progress) {

    // Convert
    int[][] temp = new int[grouped.size()][2];
    int idx = 0;
    final int[] values2 = grouped.values;
    final int[] keys2 = grouped.keys;
    final boolean[] states2 = grouped.allocated;
    for (int i = 0; i < states2.length; i++) {
        if (states2[i]) {
            temp[idx++] = new int[] { keys2[i], values2[i] };
        }
        if (stop.value) { throw new ComputationInterruptedException(); }
    }
    grouped = null;

    // Sort ascending by size
    Arrays.sort(temp, new Comparator<int[]>() {
        public int compare(int[] o1, int[] o2) {
            if (stop.value) { throw new ComputationInterruptedException(); }
            return Integer.compare(o1[0], o2[0]);
        }
    });

    // Convert and analyze
    int numClasses = 0;
    int numTuples = 0;
    this.equivalenceClasses = new int[temp.length * 2];
    idx = 0;
    for (int[] entry : temp) {
        this.equivalenceClasses[idx++] = entry[0];
        this.equivalenceClasses[idx++] = entry[1];
        numClasses += entry[1];
        numTuples += entry[0] * entry[1];
        if (stop.value) { throw new ComputationInterruptedException(); }
    }
    this.numRecords = numTuples;
    this.numClasses = numClasses;
    this.avgClassSize = this.numRecords / this.numClasses;
}
 
Example #13
Source File: ImportWizardPageCSV.java    From arx with Apache License 2.0 4 votes vote down vote up
/**
 * Tries to detect the separator used within this file
 *
 * This goes through up to {@link ImportWizardModel#PREVIEW_MAX_LINES} lines
 * and tries to detect the used separator by counting how often each of
 * the available {@link #delimiters} is used.
 *
 * @throws IOException In case file couldn't be accessed successfully
 */
private void detectDelimiter() throws IOException {
    Charset charset = getCharset();

    final BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(comboLocation.getText()), charset));
    final IntIntOpenHashMap map = new IntIntOpenHashMap();
    final CharIntOpenHashMap delimitors = new CharIntOpenHashMap();
    for (int i=0; i<this.delimiters.length; i++) {
        delimitors.put(this.delimiters[i], i);
    }
    int countLines = 0;
    int countChars = 0;

    /* Iterate over data */
    String line = r.readLine();
    outer: while ((countLines < ImportWizardModel.PREVIEW_MAX_LINES) && (line != null)) {

        /* Iterate over line character by character */
        final char[] a = line.toCharArray();
        for (final char c : a) {
            if (delimitors.containsKey(c)) {
                map.putOrAdd(delimitors.get(c), 0, 1);
            }
            countChars++;
            if (countChars > ImportWizardModel.DETECT_MAX_CHARS) {
                break outer;
            }
        }
        line = r.readLine();
        countLines++;
    }
    r.close();

    if (map.isEmpty()) {
        selectedDelimiter = 0;
        return;
    }

    /* Check which separator was used the most */
    int max = Integer.MIN_VALUE;
    final int [] keys = map.keys;
    final int [] values = map.values;
    final boolean [] allocated = map.allocated;
    for (int i = 0; i < allocated.length; i++) {
        if (allocated[i] && values[i] > max) {
            max = values[i];
            selectedDelimiter = keys[i];
        }
    }
}
 
Example #14
Source File: WindowSupportingAdapter.java    From Palmetto with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * Returns the positions of the given words inside the corpus.
 * 
 * @param words
 *            the words for which the positions inside the documents should
 *            be determined
 * @param docLengths
 *            empty int int map in which the document lengths and counts are
 *            inserted
 * @return the positions of the given words inside the corpus
 */
public IntObjectOpenHashMap<IntArrayList[]> requestWordPositionsInDocuments(String words[],
        IntIntOpenHashMap docLengths);
 
Example #15
Source File: RiskModelHistogram.java    From arx with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a new instance from the given distribution.
 * IMPORTANT: Suppressed records should have been ignored before calling this.
 * 
 * @param distribution
 */
public RiskModelHistogram(final IntIntOpenHashMap distribution) {
    this.convertAndAnalyze(distribution,
                           new WrappedBoolean(),
                           new WrappedInteger());
}