org.apache.lucene.util.packed.PackedInts Java Examples

The following examples show how to use org.apache.lucene.util.packed.PackedInts. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BestBucketsDeferringCollector.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
    finishLeaf();

    context = ctx;
    docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
    buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);

    return new LeafBucketCollector() {
        int lastDoc = 0;

        @Override
        public void collect(int doc, long bucket) throws IOException {
            docDeltas.add(doc - lastDoc);
            buckets.add(bucket);
            lastDoc = doc;
            maxBucket = Math.max(maxBucket, bucket);
        }
    };
}
 
Example #2
Source File: DiskDocValuesConsumer.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.SORTED_SET);
  // write the ord -> byte[] as a binary field
  addBinaryField(field, values);
  // write the stream of ords as a numeric field
  // NOTE: we could return an iterator that delta-encodes these within a doc
  addNumericField(field, ords);
  
  // write the doc -> ord count as a absolute index to the stream
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.NUMERIC);
  meta.writeVInt(PackedInts.VERSION_CURRENT);
  meta.writeLong(data.getFilePointer());
  meta.writeVLong(maxDoc);
  meta.writeVInt(BLOCK_SIZE);

  final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
  long addr = 0;
  for (Number v : docToOrdCount) {
    addr += v.longValue();
    writer.add(addr);
  }
  writer.finish();
}
 
Example #3
Source File: TestLSBRadixSorter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void test(LSBRadixSorter sorter, int[] arr, int len) {
  final int[] expected = ArrayUtil.copyOfSubArray(arr, 0, len);
  Arrays.sort(expected);

  int numBits = 0;
  for (int i = 0; i < len; ++i) {
    numBits = Math.max(numBits, PackedInts.bitsRequired(arr[i]));
  }

  if (random().nextBoolean()) {
    numBits = TestUtil.nextInt(random(), numBits, 32);
  }

  sorter.sort(numBits, arr, len);
  final int[] actual = ArrayUtil.copyOfSubArray(arr, 0, len);
  assertArrayEquals(expected, actual);
}
 
Example #4
Source File: LZ4.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
void reset(byte[] bytes, int off, int len) {
  Objects.checkFromIndexSize(off, len, bytes.length);
  this.bytes = bytes;
  this.base = off;
  this.lastOff = off - 1;
  this.end = off + len;
  final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
  final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
  hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
  if (hashTable == null || hashTable.size() < 1 << hashLog || hashTable.getBitsPerValue() < bitsPerOffset) {
    hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
  } else {
    // Avoid calling hashTable.clear(), this makes it costly to compress many short sequences otherwise.
    // Instead, get() checks that references are less than the current offset.
    get(off); // this sets the hashTable for the first 4 bytes as a side-effect
  }
}
 
Example #5
Source File: FixedGapTermsIndexWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public FixedGapTermsIndexWriter(SegmentWriteState state, int termIndexInterval) throws IOException {
  if (termIndexInterval <= 0) {
    throw new IllegalArgumentException("invalid termIndexInterval: " + termIndexInterval);
  }
  this.termIndexInterval = termIndexInterval;
  final String indexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
  out = state.directory.createOutput(indexFileName, state.context);
  boolean success = false;
  try {
    CodecUtil.writeIndexHeader(out, CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    out.writeVInt(termIndexInterval);
    out.writeVInt(PackedInts.VERSION_CURRENT);
    out.writeVInt(BLOCKSIZE);
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(out);
    }
  }
}
 
Example #6
Source File: PackedArrayIndexFieldData.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
    int bitsRequired;
    long pageMemorySize = 0;
    PackedInts.FormatAndBits formatAndBits;
    if (pageMaxOrdinal == Long.MIN_VALUE) {
        // empty page - will use the null reader which just stores size
        pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);

    } else {
        long pageMinValue = values.get(pageMinOrdinal);
        long pageMaxValue = values.get(pageMaxOrdinal);
        long pageDelta = pageMaxValue - pageMinValue;
        if (pageDelta != 0) {
            bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
            formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
            pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
            pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
        } else {
            // empty page
            pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
        }
    }
    return pageMemorySize;
}
 
Example #7
Source File: SortedNumericDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SortedNumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example #8
Source File: HyperLogLogPlusPlus.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * Compute the required precision so that <code>count</code> distinct entries
 * would be counted with linear counting.
 */
public static int precisionFromThreshold(long count) {
    final long hashTableEntries = (long) Math.ceil(count / MAX_LOAD_FACTOR);
    int precision = PackedInts.bitsRequired(hashTableEntries * RamUsageEstimator.NUM_BYTES_INT);
    precision = Math.max(precision, MIN_PRECISION);
    precision = Math.min(precision, MAX_PRECISION);
    return precision;
}
 
Example #9
Source File: TestTimSorterWorstCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Create an array for the given list of runs. */
private static PackedInts.Mutable createArray(int length, List<Integer> runs) {
  PackedInts.Mutable array = PackedInts.getMutable(length, 1, 0);
  int endRun = -1;
  for (long len : runs) {
    array.set(endRun += len, 1);
  }
  array.set(length - 1, 0);
  return array;
}
 
Example #10
Source File: OrdinalsBuilder.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException {
    this.maxDoc = maxDoc;
    int startBitsPerValue = 8;
    if (numTerms >= 0) {
        startBitsPerValue = PackedInts.bitsRequired(numTerms);
    }
    ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
    spare = new LongsRef();
}
 
Example #11
Source File: NodeHash.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void rehash() throws IOException {
  final PagedGrowableWriter oldTable = table;

  table = new PagedGrowableWriter(2*oldTable.size(), 1<<30, PackedInts.bitsRequired(count), PackedInts.COMPACT);
  mask = table.size()-1;
  for(long idx=0;idx<oldTable.size();idx++) {
    final long address = oldTable.get(idx);
    if (address != 0) {
      addNew(address);
    }
  }
}
 
Example #12
Source File: DocValuesFieldUpdates.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected DocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
  this.maxDoc = maxDoc;
  this.delGen = delGen;
  this.field = field;
  if (type == null) {
    throw new NullPointerException("DocValuesType must not be null");
  }
  this.type = type;
  bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
  docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
}
 
Example #13
Source File: NormValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  docsWithField = new DocsWithFieldSet();
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example #14
Source File: IndexSorter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public ComparableProvider[] getComparableProviders(List<? extends LeafReader> readers) throws IOException {
  final ComparableProvider[] providers = new ComparableProvider[readers.size()];
  final SortedDocValues[] values = new SortedDocValues[readers.size()];
  for(int i=0;i<readers.size();i++) {
    final SortedDocValues sorted = valuesProvider.get(readers.get(i));
    values[i] = sorted;
  }
  OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
  final int missingOrd;
  if (missingValue == SortField.STRING_LAST) {
    missingOrd = Integer.MAX_VALUE;
  } else {
    missingOrd = Integer.MIN_VALUE;
  }

  for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
    final SortedDocValues readerValues = values[readerIndex];
    final LongValues globalOrds = ordinalMap.getGlobalOrds(readerIndex);
    providers[readerIndex] = docID -> {
      if (readerValues.advanceExact(docID)) {
        // translate segment's ord to global ord space:
        return globalOrds.get(readerValues.ordValue());
      } else {
        return missingOrd;
      }
    };
  }
  return providers;
}
 
Example #15
Source File: NumericDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example #16
Source File: MultiDocValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
 * <p>
 * This is an extremely slow way to access sorted values. Instead, access them per-segment
 * with {@link LeafReader#getSortedSetDocValues(String)}
 * </p>  
 */
public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException {
  final List<LeafReaderContext> leaves = r.leaves();
  final int size = leaves.size();
  
  if (size == 0) {
    return null;
  } else if (size == 1) {
    return leaves.get(0).reader().getSortedSetDocValues(field);
  }
  
  boolean anyReal = false;
  final SortedSetDocValues[] values = new SortedSetDocValues[size];
  final int[] starts = new int[size+1];
  long totalCost = 0;
  for (int i = 0; i < size; i++) {
    LeafReaderContext context = leaves.get(i);
    SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
    if (v == null) {
      v = DocValues.emptySortedSet();
    } else {
      anyReal = true;
      totalCost += v.cost();
    }
    values[i] = v;
    starts[i] = context.docBase;
  }
  starts[size] = r.maxDoc();
  
  if (anyReal == false) {
    return null;
  } else {
    IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
    IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
    OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
    return new MultiSortedSetDocValues(values, starts, mapping, totalCost);
  }
}
 
Example #17
Source File: MultiDocValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things).
 * <p>
 * This is an extremely slow way to access sorted values. Instead, access them per-segment
 * with {@link LeafReader#getSortedDocValues(String)}
 * </p>  
 */
public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
  final List<LeafReaderContext> leaves = r.leaves();
  final int size = leaves.size();
  
  if (size == 0) {
    return null;
  } else if (size == 1) {
    return leaves.get(0).reader().getSortedDocValues(field);
  }
  
  boolean anyReal = false;
  final SortedDocValues[] values = new SortedDocValues[size];
  final int[] starts = new int[size+1];
  long totalCost = 0;
  for (int i = 0; i < size; i++) {
    LeafReaderContext context = leaves.get(i);
    SortedDocValues v = context.reader().getSortedDocValues(field);
    if (v == null) {
      v = DocValues.emptySorted();
    } else {
      anyReal = true;
      totalCost += v.cost();
    }
    values[i] = v;
    starts[i] = context.docBase;
  }
  starts[size] = r.maxDoc();
  
  if (anyReal == false) {
    return null;
  } else {
    IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
    IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
    OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
    return new MultiSortedDocValues(values, starts, mapping, totalCost);
  }
}
 
Example #18
Source File: MultiOrdinals.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
 */
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) {
    int bitsPerOrd = PackedInts.bitsRequired(numOrds);
    bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue;
    // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
    // beginning of the block and all docs have one at the end of the block
    final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
    final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
    int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
    bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue;

    final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
    final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
    return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
}
 
Example #19
Source File: ParentChildIndexFieldData.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private static OrdinalMap buildOrdinalMap(AtomicParentChildFieldData[] atomicFD, String parentType) throws IOException {
    final SortedDocValues[] ordinals = new SortedDocValues[atomicFD.length];
    for (int i = 0; i < ordinals.length; ++i) {
        ordinals[i] = atomicFD[i].getOrdinalsValues(parentType);
    }
    return OrdinalMap.build(null, ordinals, PackedInts.DEFAULT);
}
 
Example #20
Source File: MergeState.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
  final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
  int del = 0;
  for (int i = 0; i < maxDoc; ++i) {
    docMapBuilder.add(i - del);
    if (liveDocs.get(i) == false) {
      ++del;
    }
  }
  return docMapBuilder.build();
}
 
Example #21
Source File: OrdinalsBuilder.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * Builds an {@link Ordinals} instance from the builders current state.
 */
public Ordinals build(Settings settings) {
    final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FASTEST);
    final boolean forceMultiOrdinals = settings.getAsBoolean(FORCE_MULTI_ORDINALS, false);
    if (forceMultiOrdinals || numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getValueCount(), acceptableOverheadRatio)) {
        // MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields
        return new MultiOrdinals(this, acceptableOverheadRatio);
    } else {
        return new SinglePackedOrdinals(this, acceptableOverheadRatio);
    }
}
 
Example #22
Source File: BaseCompressingDocValuesFormatTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testDateCompression() throws IOException {
  try (final Directory dir = new ByteBuffersDirectory()) {
    final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    final IndexWriter iwriter = new IndexWriter(dir, iwc);

    final long base = 13; // prime
    final long day = 1000L * 60 * 60 * 24;

    final Document doc = new Document();
    final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
    doc.add(dvf);
    for (int i = 0; i < 300; ++i) {
      dvf.setLongValue(base + random().nextInt(1000) * day);
      iwriter.addDocument(doc);
    }
    iwriter.forceMerge(1);
    final long size1 = dirSize(dir);
    for (int i = 0; i < 50; ++i) {
      dvf.setLongValue(base + random().nextInt(1000) * day);
      iwriter.addDocument(doc);
    }
    iwriter.forceMerge(1);
    final long size2 = dirSize(dir);
    // make sure the new longs costed less than if they had only been packed
    assertTrue(size2 < size1 + (PackedInts.bitsRequired(day) * 50) / 8);
  }
}
 
Example #23
Source File: BinaryDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.bytes = new PagedBytes(BLOCK_BITS);
  this.bytesOut = bytes.getDataOutput();
  this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  this.iwBytesUsed = iwBytesUsed;
  this.docsWithField = new DocsWithFieldSet();
  this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example #24
Source File: SortedSetDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example #25
Source File: SortedDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}
 
Example #26
Source File: LegacyFieldsIndexReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
LegacyFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
  maxDoc = si.maxDoc();
  int[] docBases = new int[16];
  long[] startPointers = new long[16];
  int[] avgChunkDocs = new int[16];
  long[] avgChunkSizes = new long[16];
  PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16];
  PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];

  final int packedIntsVersion = fieldsIndexIn.readVInt();

  int blockCount = 0;

  for (;;) {
    final int numChunks = fieldsIndexIn.readVInt();
    if (numChunks == 0) {
      break;
    }
    if (blockCount == docBases.length) {
      final int newSize = ArrayUtil.oversize(blockCount + 1, 8);
      docBases = ArrayUtil.growExact(docBases, newSize);
      startPointers = ArrayUtil.growExact(startPointers, newSize);
      avgChunkDocs = ArrayUtil.growExact(avgChunkDocs, newSize);
      avgChunkSizes = ArrayUtil.growExact(avgChunkSizes, newSize);
      docBasesDeltas = ArrayUtil.growExact(docBasesDeltas, newSize);
      startPointersDeltas = ArrayUtil.growExact(startPointersDeltas, newSize);
    }

    // doc bases
    docBases[blockCount] = fieldsIndexIn.readVInt();
    avgChunkDocs[blockCount] = fieldsIndexIn.readVInt();
    final int bitsPerDocBase = fieldsIndexIn.readVInt();
    if (bitsPerDocBase > 32) {
      throw new CorruptIndexException("Corrupted bitsPerDocBase: " + bitsPerDocBase, fieldsIndexIn);
    }
    docBasesDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);

    // start pointers
    startPointers[blockCount] = fieldsIndexIn.readVLong();
    avgChunkSizes[blockCount] = fieldsIndexIn.readVLong();
    final int bitsPerStartPointer = fieldsIndexIn.readVInt();
    if (bitsPerStartPointer > 64) {
      throw new CorruptIndexException("Corrupted bitsPerStartPointer: " + bitsPerStartPointer, fieldsIndexIn);
    }
    startPointersDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);

    ++blockCount;
  }

  this.docBases = ArrayUtil.copyOfSubArray(docBases, 0, blockCount);
  this.startPointers = ArrayUtil.copyOfSubArray(startPointers, 0, blockCount);
  this.avgChunkDocs = ArrayUtil.copyOfSubArray(avgChunkDocs, 0, blockCount);
  this.avgChunkSizes = ArrayUtil.copyOfSubArray(avgChunkSizes, 0, blockCount);
  this.docBasesDeltas = ArrayUtil.copyOfSubArray(docBasesDeltas, 0, blockCount);
  this.startPointersDeltas = ArrayUtil.copyOfSubArray(startPointersDeltas, 0, blockCount);
}
 
Example #27
Source File: BinaryDocValuesFieldUpdates.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public BinaryDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
  super(maxDoc, delGen, field, DocValuesType.BINARY);
  offsets = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
  lengths = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
  values = new BytesRefBuilder();
}
 
Example #28
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected Accountable createValue(LeafReader reader, CacheKey key)
    throws IOException {

  // TODO: would be nice to first check if DocTermsIndex
  // was already cached for this field and then return
  // that instead, to avoid insanity

  final int maxDoc = reader.maxDoc();
  Terms terms = reader.terms(key.field);

  final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

  final int termCountHardLimit = maxDoc;

  // Holds the actual term data, expanded.
  final PagedBytes bytes = new PagedBytes(15);

  int startBPV;

  if (terms != null) {
    // Try for coarse estimate for number of bits; this
    // should be an underestimate most of the time, which
    // is fine -- GrowableWriter will reallocate as needed
    long numUniqueTerms = terms.size();
    if (numUniqueTerms != -1L) {
      if (numUniqueTerms > termCountHardLimit) {
        numUniqueTerms = termCountHardLimit;
      }
      startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
    } else {
      startBPV = 1;
    }
  } else {
    startBPV = 1;
  }

  final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);
  
  // pointer==0 means not set
  bytes.copyUsingLengthPrefix(new BytesRef());

  if (terms != null) {
    int termCount = 0;
    final TermsEnum termsEnum = terms.iterator();
    PostingsEnum docs = null;
    while(true) {
      if (termCount++ == termCountHardLimit) {
        // app is misusing the API (there is more than
        // one term per doc); in this case we make best
        // effort to load what we can (see LUCENE-2142)
        break;
      }

      final BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      final long pointer = bytes.copyUsingLengthPrefix(term);
      docs = termsEnum.postings(docs, PostingsEnum.NONE);
      while (true) {
        final int docID = docs.nextDoc();
        if (docID == DocIdSetIterator.NO_MORE_DOCS) {
          break;
        }
        docToOffset.set(docID, pointer);
      }
    }
  }

  final PackedInts.Reader offsetReader = docToOffset.getMutable();
  Bits docsWithField = new Bits() {
    @Override
    public boolean get(int index) {
      return offsetReader.get(index) != 0;
    }

    @Override
    public int length() {
      return maxDoc;
    }
  };

  wrapper.setDocsWithField(reader, key.field, docsWithField, null);
  // maybe an int-only impl?
  return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader, docsWithField);
}
 
Example #29
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public BinaryDocValues getTerms(LeafReader reader, String field) throws IOException {
  return getTerms(reader, field, PackedInts.FAST);
}
 
Example #30
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset, Bits docsWithField) {
  this.bytes = bytes;
  this.docToOffset = docToOffset;
  this.docsWithField = docsWithField;
}