com.carrotsearch.hppc.cursors.LongCursor Java Examples

The following examples show how to use com.carrotsearch.hppc.cursors.LongCursor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExpandComponent.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Query getGroupQuery(String fname,
                         FieldType ft,
                         int size,
                         LongHashSet groupSet) {

  BytesRef[] bytesRefs = new BytesRef[size];
  int index = -1;
  BytesRefBuilder term = new BytesRefBuilder();
  Iterator<LongCursor> it = groupSet.iterator();

  while (it.hasNext()) {
    LongCursor cursor = it.next();
    String stringVal = numericToString(ft, cursor.value);
    ft.readableToIndexed(stringVal, term);
    bytesRefs[++index] = term.toBytesRef();
  }

  return new TermInSetQuery(fname, bytesRefs);
}
 
Example #2
Source File: LongTermsSet.java    From siren-join with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Serialize the list of terms to the {@link StreamOutput}.
 * <br>
 * Given the low performance of {@link org.elasticsearch.common.io.stream.BytesStreamOutput} when writing a large number
 * of longs (5 to 10 times slower than writing directly to a byte[]), we use a small buffer of 8kb
 * to optimise the throughput. 8kb seems to be the optimal buffer size, larger buffer size did not improve
 * the throughput.
 *
 * @param out the output
 */
@Override
public void writeTo(StreamOutput out) throws IOException {
  // Encode flag
  out.writeBoolean(this.isPruned());

  // Encode size of list
  out.writeInt(set.size());

  // Encode longs
  BytesRef buffer = new BytesRef(new byte[1024 * 8]);
  Iterator<LongCursor> it = set.iterator();
  while (it.hasNext()) {
    Bytes.writeLong(buffer, it.next().value);
    if (buffer.offset == buffer.length) {
      out.write(buffer.bytes, 0, buffer.offset);
      buffer.offset = 0;
    }
  }

  // flush the remaining bytes from the buffer
  out.write(buffer.bytes, 0, buffer.offset);
}
 
Example #3
Source File: ExpandComponent.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public NumericGroupExpandCollector(String field, long nullValue, LongHashSet groupSet, IntHashSet collapsedSet, int limit, Sort sort) throws IOException {
  int numGroups = collapsedSet.size();
  this.nullValue = nullValue;
  groups = new LongObjectHashMap<>(numGroups);
  for (LongCursor cursor : groupSet) {
    groups.put(cursor.value, getCollector(limit, sort));
  }

  this.field = field;
  this.collapsedSet = collapsedSet;
}
 
Example #4
Source File: ExpandComponent.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Query getPointGroupQuery(SchemaField sf,
                                 int size,
                                 LongHashSet groupSet) {

  Iterator<LongCursor> it = groupSet.iterator();
  List<String> values = new ArrayList<>(size);
  FieldType ft = sf.getType();
  while (it.hasNext()) {
    LongCursor cursor = it.next();
    values.add(numericToString(ft, cursor.value));
  }

  return sf.getType().getSetQuery(null, sf, values);
}
 
Example #5
Source File: IntLongDynamicMap.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void forEachValue(LongConsumer consumer) {
  if (keyValues != null) {
    for (long val : keyValues) {
      if (val != emptyValue) consumer.accept(val);
    }
  } else {
    for (LongCursor ord : hashMap.values()) {
      consumer.accept(ord.value);
    }
  }
}
 
Example #6
Source File: LongTermsSet.java    From siren-join with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public BytesRef writeToBytes() {
  long start = System.nanoTime();
  int size = set.size();

  BytesRef bytes = new BytesRef(new byte[HEADER_SIZE + 8 * size]);

  // Encode encoding type
  Bytes.writeInt(bytes, this.getEncoding().ordinal());

  // Encode flag
  bytes.bytes[bytes.offset++] = (byte) (this.isPruned() ? 1 : 0);

  // Encode size of the set
  Bytes.writeInt(bytes, size);

  // Encode longs
  for (LongCursor i : set) {
    Bytes.writeLong(bytes, i.value);
  }

  logger.debug("Serialized {} terms - took {} ms", this.size(), (System.nanoTime() - start) / 1000000);

  bytes.length = bytes.offset;
  bytes.offset = 0;
  return bytes;
}
 
Example #7
Source File: TokenTreeBuilder.java    From sasi with Apache License 2.0 5 votes vote down vote up
public void add(SortedMap<Long, LongSet> data)
{
    for (Map.Entry<Long, LongSet> newEntry : data.entrySet())
    {
        LongSet found = tokens.get(newEntry.getKey());
        if (found == null)
            tokens.put(newEntry.getKey(), (found = new LongOpenHashSet(4)));

        for (LongCursor offset : newEntry.getValue())
            found.add(offset.value);
    }
}
 
Example #8
Source File: OnDiskIndexTest.java    From sasi with Apache License 2.0 5 votes vote down vote up
private static Set<DecoratedKey> convert(TokenTreeBuilder offsets)
{
    Set<DecoratedKey> result = new HashSet<>();

    Iterator<Pair<Long, LongSet>> offsetIter = offsets.iterator();
    while (offsetIter.hasNext())
    {
        LongSet v = offsetIter.next().right;

        for (LongCursor offset : v)
            result.add(keyAt(offset.value));
    }
    return result;
}
 
Example #9
Source File: TokenTreeTest.java    From sasi with Apache License 2.0 5 votes vote down vote up
@Override
public Iterator<DecoratedKey> iterator()
{
    List<DecoratedKey> keys = new ArrayList<>(offsets.size());
    for (LongCursor offset : offsets)
         keys.add(dk(offset.value));

    return keys.iterator();
}
 
Example #10
Source File: TokenTreeTest.java    From sasi with Apache License 2.0 5 votes vote down vote up
private static Set<DecoratedKey> convert(LongSet offsets)
{
    Set<DecoratedKey> keys = new HashSet<>();
    for (LongCursor offset : offsets)
        keys.add(KEY_CONVERTER.apply(offset.value));

    return keys;
}
 
Example #11
Source File: SITransactor.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private void resolveChildConflicts(Partition table,DataPut put,LongHashSet conflictingChildren) throws IOException{
    if(conflictingChildren!=null && !conflictingChildren.isEmpty()){
        DataDelete delete=opFactory.newDelete(put.key());
        Iterable<DataCell> cells=put.cells();
        for(LongCursor lc : conflictingChildren){
            for(DataCell dc : cells){
                delete.deleteColumn(dc.family(),dc.qualifier(),lc.value);
            }
            delete.deleteColumn(SIConstants.DEFAULT_FAMILY_BYTES,SIConstants.TOMBSTONE_COLUMN_BYTES,lc.value);
            delete.deleteColumn(SIConstants.DEFAULT_FAMILY_BYTES,SIConstants.COMMIT_TIMESTAMP_COLUMN_BYTES,lc.value);
        }
        delete.addAttribute(SIConstants.SUPPRESS_INDEXING_ATTRIBUTE_NAME,SIConstants.SUPPRESS_INDEXING_ATTRIBUTE_VALUE);
        table.delete(delete);
    }
}
 
Example #12
Source File: TermsByQueryActionTest.java    From siren-join with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
 * Tests the ordering by document score.
 */
@Test
public void testTermsByQueryWithLimitOrderByDocScore() throws Exception {
  // Enforce one single shard for index as it is difficult with multiple shards
  // to avoid having one shard with less than 5 even ids (i.e., to avoid the shard
  // returning odd ids.
  Map<String, Object> indexSettings = new HashMap<>();
  indexSettings.put("number_of_shards", 1);
  assertAcked(prepareCreate("test").setSettings(indexSettings));

  int numDocs = RandomizedTest.randomIntBetween(100, 2000);
  logger.info("--> indexing [" + numDocs + "] docs");
  for (int i = 0; i < numDocs / 2; i += 2) {
    client().prepareIndex("test", "type", "" + i)
            .setSource(jsonBuilder().startObject()
            .field("int", i)
            .field("text", "aaa")
            .endObject())
            .execute().actionGet();
  }

  for (int i = 1; i < numDocs / 2; i += 2) {
    client().prepareIndex("test", "type", "" + i)
            .setSource(jsonBuilder().startObject()
            .field("int", i)
            .field("text", "aaa aaa")
            .endObject())
            .execute().actionGet();
  }

  client().admin().indices().prepareRefresh("test").execute().actionGet();

  logger.info("--> lookup terms in field [int]");
  TermsByQueryResponse resp = new TermsByQueryRequestBuilder(client(), TermsByQueryAction.INSTANCE).setIndices("test")
                                                                      .setField("int")
                                                                      .setQuery(QueryBuilders.termQuery("text", "aaa"))
                                                                      .setOrderBy(TermsByQueryRequest.Ordering.DOC_SCORE)
                                                                      .setMaxTermsPerShard(5)
                                                                      .setTermsEncoding(TermsByQueryRequest.TermsEncoding.LONG)
                                                                      .execute()
                                                                      .actionGet();

  int expectedMaxResultSize = this.getNumShards("test").totalNumShards * 5;
  ElasticsearchAssertions.assertNoFailures(resp);
  assertThat(resp.getEncodedTermsSet(), notNullValue());
  assertThat(resp.getSize(), lessThanOrEqualTo(expectedMaxResultSize));
  TermsSet lTerms = NumericTermsSet.readFrom(resp.getEncodedTermsSet());
  assertThat(lTerms instanceof LongTermsSet, is(true));

  // If the ordering by document score worked, we should only have documents with text = aaa (even ids), and no
  // documents with text = aaa aaa (odd ids), as the first one will be ranked higher.

  Iterator<LongCursor> it = ((LongTermsSet) lTerms).getLongHashSet().iterator();
  while (it.hasNext()) {
    long value = it.next().value;
    assertThat(value % 2 == 0, is(true));
  }
}
 
Example #13
Source File: TokenTreeBuilder.java    From sasi with Apache License 2.0 4 votes vote down vote up
private void serializeOverflowCollisions(ByteBuffer buf)
{
    if (overflowCollisions != null)
        for (LongCursor offset : overflowCollisions)
            buf.putLong(offset.value);
}
 
Example #14
Source File: TokenTreeTest.java    From sasi with Apache License 2.0 4 votes vote down vote up
@Test
public void buildWithMultipleMapsAndIterate() throws Exception
{
    final SortedMap<Long, LongSet> merged = new TreeMap<>();
    final TokenTreeBuilder builder = new TokenTreeBuilder(simpleTokenMap).finish();
    builder.add(collidingTokensMap);

    merged.putAll(collidingTokensMap);
    for (Map.Entry<Long, LongSet> entry : simpleTokenMap.entrySet())
    {
        if (merged.containsKey(entry.getKey()))
        {
            LongSet mergingOffsets  = entry.getValue();
            LongSet existingOffsets = merged.get(entry.getKey());

            if (mergingOffsets.equals(existingOffsets))
                continue;

            Set<Long> mergeSet = new HashSet<>();
            for (LongCursor merging : mergingOffsets)
                mergeSet.add(merging.value);

            for (LongCursor existing : existingOffsets)
                mergeSet.add(existing.value);

            LongSet mergedResults = new LongOpenHashSet();
            for (Long result : mergeSet)
                mergedResults.add(result);

            merged.put(entry.getKey(), mergedResults);
        }
        else
        {
            merged.put(entry.getKey(), entry.getValue());
        }
    }

    final Iterator<Pair<Long, LongSet>> tokenIterator = builder.iterator();
    final Iterator<Map.Entry<Long, LongSet>> listIterator = merged.entrySet().iterator();
    while (tokenIterator.hasNext() && listIterator.hasNext())
    {
        Pair<Long, LongSet> tokenNext = tokenIterator.next();
        Map.Entry<Long, LongSet> listNext = listIterator.next();

        Assert.assertEquals(listNext.getKey(), tokenNext.left);
        Assert.assertEquals(listNext.getValue(), tokenNext.right);
    }

    Assert.assertFalse("token iterator not finished", tokenIterator.hasNext());
    Assert.assertFalse("list iterator not finished", listIterator.hasNext());

}