Java Code Examples for org.apache.lucene.util.packed.PackedInts

The following examples show how to use org.apache.lucene.util.packed.PackedInts. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
    finishLeaf();

    context = ctx;
    docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
    buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);

    return new LeafBucketCollector() {
        int lastDoc = 0;

        @Override
        public void collect(int doc, long bucket) throws IOException {
            docDeltas.add(doc - lastDoc);
            buckets.add(bucket);
            lastDoc = doc;
            maxBucket = Math.max(maxBucket, bucket);
        }
    };
}
 
Example 2
Source Project: Elasticsearch   Source File: PackedArrayIndexFieldData.java    License: Apache License 2.0 6 votes vote down vote up
private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
    int bitsRequired;
    long pageMemorySize = 0;
    PackedInts.FormatAndBits formatAndBits;
    if (pageMaxOrdinal == Long.MIN_VALUE) {
        // empty page - will use the null reader which just stores size
        pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);

    } else {
        long pageMinValue = values.get(pageMinOrdinal);
        long pageMaxValue = values.get(pageMaxOrdinal);
        long pageDelta = pageMaxValue - pageMinValue;
        if (pageDelta != 0) {
            bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
            formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
            pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
            pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
        } else {
            // empty page
            pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
        }
    }
    return pageMemorySize;
}
 
Example 3
Source Project: lucene-solr   Source File: FixedGapTermsIndexWriter.java    License: Apache License 2.0 6 votes vote down vote up
public FixedGapTermsIndexWriter(SegmentWriteState state, int termIndexInterval) throws IOException {
  if (termIndexInterval <= 0) {
    throw new IllegalArgumentException("invalid termIndexInterval: " + termIndexInterval);
  }
  this.termIndexInterval = termIndexInterval;
  final String indexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
  out = state.directory.createOutput(indexFileName, state.context);
  boolean success = false;
  try {
    CodecUtil.writeIndexHeader(out, CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    out.writeVInt(termIndexInterval);
    out.writeVInt(PackedInts.VERSION_CURRENT);
    out.writeVInt(BLOCKSIZE);
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(out);
    }
  }
}
 
Example 4
Source Project: lucene-solr   Source File: LZ4.java    License: Apache License 2.0 6 votes vote down vote up
@Override
void reset(byte[] bytes, int off, int len) {
  Objects.checkFromIndexSize(off, len, bytes.length);
  this.bytes = bytes;
  this.base = off;
  this.lastOff = off - 1;
  this.end = off + len;
  final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
  final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
  hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
  if (hashTable == null || hashTable.size() < 1 << hashLog || hashTable.getBitsPerValue() < bitsPerOffset) {
    hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
  } else {
    // Avoid calling hashTable.clear(), this makes it costly to compress many short sequences otherwise.
    // Instead, get() checks that references are less than the current offset.
    get(off); // this sets the hashTable for the first 4 bytes as a side-effect
  }
}
 
Example 5
Source Project: lucene-solr   Source File: TestLSBRadixSorter.java    License: Apache License 2.0 6 votes vote down vote up
public void test(LSBRadixSorter sorter, int[] arr, int len) {
  final int[] expected = ArrayUtil.copyOfSubArray(arr, 0, len);
  Arrays.sort(expected);

  int numBits = 0;
  for (int i = 0; i < len; ++i) {
    numBits = Math.max(numBits, PackedInts.bitsRequired(arr[i]));
  }

  if (random().nextBoolean()) {
    numBits = TestUtil.nextInt(random(), numBits, 32);
  }

  sorter.sort(numBits, arr, len);
  final int[] actual = ArrayUtil.copyOfSubArray(arr, 0, len);
  assertArrayEquals(expected, actual);
}
 
Example 6
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.SORTED_SET);
  // write the ord -> byte[] as a binary field
  addBinaryField(field, values);
  // write the stream of ords as a numeric field
  // NOTE: we could return an iterator that delta-encodes these within a doc
  addNumericField(field, ords);
  
  // write the doc -> ord count as a absolute index to the stream
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.NUMERIC);
  meta.writeVInt(PackedInts.VERSION_CURRENT);
  meta.writeLong(data.getFilePointer());
  meta.writeVLong(maxDoc);
  meta.writeVInt(BLOCK_SIZE);

  final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
  long addr = 0;
  for (Number v : docToOrdCount) {
    addr += v.longValue();
    writer.add(addr);
  }
  writer.finish();
}
 
Example 7
Source Project: Elasticsearch   Source File: HyperLogLogPlusPlus.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Compute the required precision so that <code>count</code> distinct entries
 * would be counted with linear counting.
 */
public static int precisionFromThreshold(long count) {
    final long hashTableEntries = (long) Math.ceil(count / MAX_LOAD_FACTOR);
    int precision = PackedInts.bitsRequired(hashTableEntries * RamUsageEstimator.NUM_BYTES_INT);
    precision = Math.max(precision, MIN_PRECISION);
    precision = Math.min(precision, MAX_PRECISION);
    return precision;
}
 
Example 8
Source Project: Elasticsearch   Source File: OrdinalsBuilder.java    License: Apache License 2.0 5 votes vote down vote up
public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException {
    this.maxDoc = maxDoc;
    int startBitsPerValue = 8;
    if (numTerms >= 0) {
        startBitsPerValue = PackedInts.bitsRequired(numTerms);
    }
    ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
    spare = new LongsRef();
}
 
Example 9
Source Project: Elasticsearch   Source File: OrdinalsBuilder.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Builds an {@link Ordinals} instance from the builders current state.
 */
public Ordinals build(Settings settings) {
    final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FASTEST);
    final boolean forceMultiOrdinals = settings.getAsBoolean(FORCE_MULTI_ORDINALS, false);
    if (forceMultiOrdinals || numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getValueCount(), acceptableOverheadRatio)) {
        // MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields
        return new MultiOrdinals(this, acceptableOverheadRatio);
    } else {
        return new SinglePackedOrdinals(this, acceptableOverheadRatio);
    }
}
 
Example 10
Source Project: Elasticsearch   Source File: MultiOrdinals.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
 */
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) {
    int bitsPerOrd = PackedInts.bitsRequired(numOrds);
    bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue;
    // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
    // beginning of the block and all docs have one at the end of the block
    final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
    final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
    int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
    bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue;

    final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
    final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
    return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
}
 
Example 11
Source Project: Elasticsearch   Source File: ParentChildIndexFieldData.java    License: Apache License 2.0 5 votes vote down vote up
private static OrdinalMap buildOrdinalMap(AtomicParentChildFieldData[] atomicFD, String parentType) throws IOException {
    final SortedDocValues[] ordinals = new SortedDocValues[atomicFD.length];
    for (int i = 0; i < ordinals.length; ++i) {
        ordinals[i] = atomicFD[i].getOrdinalsValues(parentType);
    }
    return OrdinalMap.build(null, ordinals, PackedInts.DEFAULT);
}
 
Example 12
public void testDateCompression() throws IOException {
  try (final Directory dir = new ByteBuffersDirectory()) {
    final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    final IndexWriter iwriter = new IndexWriter(dir, iwc);

    final long base = 13; // prime
    final long day = 1000L * 60 * 60 * 24;

    final Document doc = new Document();
    final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
    doc.add(dvf);
    for (int i = 0; i < 300; ++i) {
      dvf.setLongValue(base + random().nextInt(1000) * day);
      iwriter.addDocument(doc);
    }
    iwriter.forceMerge(1);
    final long size1 = dirSize(dir);
    for (int i = 0; i < 50; ++i) {
      dvf.setLongValue(base + random().nextInt(1000) * day);
      iwriter.addDocument(doc);
    }
    iwriter.forceMerge(1);
    final long size2 = dirSize(dir);
    // make sure the new longs costed less than if they had only been packed
    assertTrue(size2 < size1 + (PackedInts.bitsRequired(day) * 50) / 8);
  }
}
 
Example 13
Source Project: lucene-solr   Source File: BinaryDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.bytes = new PagedBytes(BLOCK_BITS);
  this.bytesOut = bytes.getDataOutput();
  this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  this.iwBytesUsed = iwBytesUsed;
  this.docsWithField = new DocsWithFieldSet();
  this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example 14
Source Project: lucene-solr   Source File: SortedSetDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example 15
Source Project: lucene-solr   Source File: SortedDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example 16
Source Project: lucene-solr   Source File: MergeState.java    License: Apache License 2.0 5 votes vote down vote up
static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
  final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
  int del = 0;
  for (int i = 0; i < maxDoc; ++i) {
    docMapBuilder.add(i - del);
    if (liveDocs.get(i) == false) {
      ++del;
    }
  }
  return docMapBuilder.build();
}
 
Example 17
Source Project: lucene-solr   Source File: MultiDocValues.java    License: Apache License 2.0 5 votes vote down vote up
/** Returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things).
 * <p>
 * This is an extremely slow way to access sorted values. Instead, access them per-segment
 * with {@link LeafReader#getSortedDocValues(String)}
 * </p>  
 */
public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
  final List<LeafReaderContext> leaves = r.leaves();
  final int size = leaves.size();
  
  if (size == 0) {
    return null;
  } else if (size == 1) {
    return leaves.get(0).reader().getSortedDocValues(field);
  }
  
  boolean anyReal = false;
  final SortedDocValues[] values = new SortedDocValues[size];
  final int[] starts = new int[size+1];
  long totalCost = 0;
  for (int i = 0; i < size; i++) {
    LeafReaderContext context = leaves.get(i);
    SortedDocValues v = context.reader().getSortedDocValues(field);
    if (v == null) {
      v = DocValues.emptySorted();
    } else {
      anyReal = true;
      totalCost += v.cost();
    }
    values[i] = v;
    starts[i] = context.docBase;
  }
  starts[size] = r.maxDoc();
  
  if (anyReal == false) {
    return null;
  } else {
    IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
    IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
    OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
    return new MultiSortedDocValues(values, starts, mapping, totalCost);
  }
}
 
Example 18
Source Project: lucene-solr   Source File: MultiDocValues.java    License: Apache License 2.0 5 votes vote down vote up
/** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
 * <p>
 * This is an extremely slow way to access sorted values. Instead, access them per-segment
 * with {@link LeafReader#getSortedSetDocValues(String)}
 * </p>  
 */
public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException {
  final List<LeafReaderContext> leaves = r.leaves();
  final int size = leaves.size();
  
  if (size == 0) {
    return null;
  } else if (size == 1) {
    return leaves.get(0).reader().getSortedSetDocValues(field);
  }
  
  boolean anyReal = false;
  final SortedSetDocValues[] values = new SortedSetDocValues[size];
  final int[] starts = new int[size+1];
  long totalCost = 0;
  for (int i = 0; i < size; i++) {
    LeafReaderContext context = leaves.get(i);
    SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
    if (v == null) {
      v = DocValues.emptySortedSet();
    } else {
      anyReal = true;
      totalCost += v.cost();
    }
    values[i] = v;
    starts[i] = context.docBase;
  }
  starts[size] = r.maxDoc();
  
  if (anyReal == false) {
    return null;
  } else {
    IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
    IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
    OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
    return new MultiSortedSetDocValues(values, starts, mapping, totalCost);
  }
}
 
Example 19
Source Project: lucene-solr   Source File: NumericDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example 20
Source Project: lucene-solr   Source File: IndexSorter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ComparableProvider[] getComparableProviders(List<? extends LeafReader> readers) throws IOException {
  final ComparableProvider[] providers = new ComparableProvider[readers.size()];
  final SortedDocValues[] values = new SortedDocValues[readers.size()];
  for(int i=0;i<readers.size();i++) {
    final SortedDocValues sorted = valuesProvider.get(readers.get(i));
    values[i] = sorted;
  }
  OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
  final int missingOrd;
  if (missingValue == SortField.STRING_LAST) {
    missingOrd = Integer.MAX_VALUE;
  } else {
    missingOrd = Integer.MIN_VALUE;
  }

  for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
    final SortedDocValues readerValues = values[readerIndex];
    final LongValues globalOrds = ordinalMap.getGlobalOrds(readerIndex);
    providers[readerIndex] = docID -> {
      if (readerValues.advanceExact(docID)) {
        // translate segment's ord to global ord space:
        return globalOrds.get(readerValues.ordValue());
      } else {
        return missingOrd;
      }
    };
  }
  return providers;
}
 
Example 21
Source Project: lucene-solr   Source File: NormValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  docsWithField = new DocsWithFieldSet();
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example 22
Source Project: lucene-solr   Source File: SortedNumericDocValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
public SortedNumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example 23
Source Project: lucene-solr   Source File: DocValuesFieldUpdates.java    License: Apache License 2.0 5 votes vote down vote up
protected DocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
  this.maxDoc = maxDoc;
  this.delGen = delGen;
  this.field = field;
  if (type == null) {
    throw new NullPointerException("DocValuesType must not be null");
  }
  this.type = type;
  bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
  docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
}
 
Example 24
Source Project: lucene-solr   Source File: NodeHash.java    License: Apache License 2.0 5 votes vote down vote up
private void rehash() throws IOException {
  final PagedGrowableWriter oldTable = table;

  table = new PagedGrowableWriter(2*oldTable.size(), 1<<30, PackedInts.bitsRequired(count), PackedInts.COMPACT);
  mask = table.size()-1;
  for(long idx=0;idx<oldTable.size();idx++) {
    final long address = oldTable.get(idx);
    if (address != 0) {
      addNew(address);
    }
  }
}
 
Example 25
Source Project: lucene-solr   Source File: TestTimSorterWorstCase.java    License: Apache License 2.0 5 votes vote down vote up
/** Create an array for the given list of runs. */
private static PackedInts.Mutable createArray(int length, List<Integer> runs) {
  PackedInts.Mutable array = PackedInts.getMutable(length, 1, 0);
  int endRun = -1;
  for (long len : runs) {
    array.set(endRun += len, 1);
  }
  array.set(length - 1, 0);
  return array;
}
 
Example 26
Source Project: Elasticsearch   Source File: OrdinalsBuilder.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Return a {@link org.apache.lucene.util.packed.PackedInts.Reader} instance mapping every doc ID to its first ordinal + 1 if it exists and 0 otherwise.
 */
public PackedInts.Reader getFirstOrdinals() {
    return ordinals.firstOrdinals;
}
 
Example 27
Source Project: Elasticsearch   Source File: ParentChildIndexFieldData.java    License: Apache License 2.0 4 votes vote down vote up
TypeBuilder(float acceptableTransientOverheadRatio, LeafReader reader) throws IOException {
    bytes = new PagedBytes(15);
    termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
    builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
}
 
Example 28
Source Project: Elasticsearch   Source File: PackedArrayIndexFieldData.java    License: Apache License 2.0 4 votes vote down vote up
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(LeafReader reader, PackedLongValues values, Ordinals build, RandomAccessOrds ordinals,
                                                                 long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) {

    CommonSettings.MemoryStorageFormat format;

    // estimate memory usage for a single packed array
    long packedDelta = maxValue - minValue + 1; // allow for a missing value
    // valuesDelta can be negative if the difference between max and min values overflows the positive side of longs.
    int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta);
    PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
    final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;

    // ordinal memory usage
    final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed();

    // estimate the memory signature of paged packing
    long pagedSingleValuesSize = (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages
    int pageIndex = 0;
    long pageMinOrdinal = Long.MAX_VALUE;
    long pageMaxOrdinal = Long.MIN_VALUE;
    for (int i = 1; i < reader.maxDoc(); ++i, pageIndex = (pageIndex + 1) % pageSize) {
        ordinals.setDocument(i);
        if (ordinals.cardinality() > 0) {
            long ordinal = ordinals.ordAt(0);
            pageMaxOrdinal = Math.max(ordinal, pageMaxOrdinal);
            pageMinOrdinal = Math.min(ordinal, pageMinOrdinal);
        }
        if (pageIndex == pageSize - 1) {
            // end of page, we now know enough to estimate memory usage
            pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);

            pageMinOrdinal = Long.MAX_VALUE;
            pageMaxOrdinal = Long.MIN_VALUE;
        }
    }

    if (pageIndex > 0) {
        // last page estimation
        pageIndex++;
        pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
    }

    if (ordinalsSize < singleValuesSize) {
        if (ordinalsSize < pagedSingleValuesSize) {
            format = CommonSettings.MemoryStorageFormat.ORDINALS;
        } else {
            format = CommonSettings.MemoryStorageFormat.PAGED;
        }
    } else {
        if (pagedSingleValuesSize < singleValuesSize) {
            format = CommonSettings.MemoryStorageFormat.PAGED;
        } else {
            format = CommonSettings.MemoryStorageFormat.PACKED;
        }
    }
    return format;
}
 
Example 29
Source Project: lucene-solr   Source File: TestJoinUtil.java    License: Apache License 2.0 4 votes vote down vote up
public void testMinMaxDocs() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(
      random(),
      dir,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
  );

  int minChildDocsPerParent = 2;
  int maxChildDocsPerParent = 16;
  int numParents = RandomNumbers.randomIntBetween(random(), 16, 64);
  int[] childDocsPerParent = new int[numParents];
  for (int p = 0; p < numParents; p++) {
    String parentId = Integer.toString(p);
    Document parentDoc = new Document();
    parentDoc.add(new StringField("id", parentId, Field.Store.YES));
    parentDoc.add(new StringField("type", "to", Field.Store.NO));
    parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
    iw.addDocument(parentDoc);
    int numChildren = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent);
    childDocsPerParent[p] = numChildren;
    for (int c = 0; c < numChildren; c++) {
      String childId = Integer.toString(p + c);
      Document childDoc = new Document();
      childDoc.add(new StringField("id", childId, Field.Store.YES));
      childDoc.add(new StringField("type", "from", Field.Store.NO));
      childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
      iw.addDocument(childDoc);
    }
  }
  iw.close();

  IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
  SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
  for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
    values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
  }
  OrdinalMap ordinalMap = OrdinalMap.build(
      null, values, PackedInts.DEFAULT
  );
  Query fromQuery = new TermQuery(new Term("type", "from"));
  Query toQuery = new TermQuery(new Term("type", "to"));

  int iters = RandomNumbers.randomIntBetween(random(), 3, 9);
  for (int i = 1; i <= iters; i++) {
    final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
    int min = RandomNumbers.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent - 1);
    int max = RandomNumbers.randomIntBetween(random(), min, maxChildDocsPerParent);
    if (VERBOSE) {
      System.out.println("iter=" + i);
      System.out.println("scoreMode=" + scoreMode);
      System.out.println("min=" + min);
      System.out.println("max=" + max);
    }
    Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, searcher, scoreMode, ordinalMap, min, max);
    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(joinQuery, collector);
    int expectedCount = 0;
    for (int numChildDocs : childDocsPerParent) {
      if (numChildDocs >= min && numChildDocs <= max) {
        expectedCount++;
      }
    }
    assertEquals(expectedCount, collector.getTotalHits());
  }

  searcher.getIndexReader().close();
  dir.close();
}
 
Example 30
Source Project: lucene-solr   Source File: LegacyFieldsIndexReader.java    License: Apache License 2.0 4 votes vote down vote up
LegacyFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
  maxDoc = si.maxDoc();
  int[] docBases = new int[16];
  long[] startPointers = new long[16];
  int[] avgChunkDocs = new int[16];
  long[] avgChunkSizes = new long[16];
  PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16];
  PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];

  final int packedIntsVersion = fieldsIndexIn.readVInt();

  int blockCount = 0;

  for (;;) {
    final int numChunks = fieldsIndexIn.readVInt();
    if (numChunks == 0) {
      break;
    }
    if (blockCount == docBases.length) {
      final int newSize = ArrayUtil.oversize(blockCount + 1, 8);
      docBases = ArrayUtil.growExact(docBases, newSize);
      startPointers = ArrayUtil.growExact(startPointers, newSize);
      avgChunkDocs = ArrayUtil.growExact(avgChunkDocs, newSize);
      avgChunkSizes = ArrayUtil.growExact(avgChunkSizes, newSize);
      docBasesDeltas = ArrayUtil.growExact(docBasesDeltas, newSize);
      startPointersDeltas = ArrayUtil.growExact(startPointersDeltas, newSize);
    }

    // doc bases
    docBases[blockCount] = fieldsIndexIn.readVInt();
    avgChunkDocs[blockCount] = fieldsIndexIn.readVInt();
    final int bitsPerDocBase = fieldsIndexIn.readVInt();
    if (bitsPerDocBase > 32) {
      throw new CorruptIndexException("Corrupted bitsPerDocBase: " + bitsPerDocBase, fieldsIndexIn);
    }
    docBasesDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);

    // start pointers
    startPointers[blockCount] = fieldsIndexIn.readVLong();
    avgChunkSizes[blockCount] = fieldsIndexIn.readVLong();
    final int bitsPerStartPointer = fieldsIndexIn.readVInt();
    if (bitsPerStartPointer > 64) {
      throw new CorruptIndexException("Corrupted bitsPerStartPointer: " + bitsPerStartPointer, fieldsIndexIn);
    }
    startPointersDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);

    ++blockCount;
  }

  this.docBases = ArrayUtil.copyOfSubArray(docBases, 0, blockCount);
  this.startPointers = ArrayUtil.copyOfSubArray(startPointers, 0, blockCount);
  this.avgChunkDocs = ArrayUtil.copyOfSubArray(avgChunkDocs, 0, blockCount);
  this.avgChunkSizes = ArrayUtil.copyOfSubArray(avgChunkSizes, 0, blockCount);
  this.docBasesDeltas = ArrayUtil.copyOfSubArray(docBasesDeltas, 0, blockCount);
  this.startPointersDeltas = ArrayUtil.copyOfSubArray(startPointersDeltas, 0, blockCount);
}