Java Code Examples for org.apache.lucene.util.packed.PackedInts#bitsRequired()

The following examples show how to use org.apache.lucene.util.packed.PackedInts#bitsRequired() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PackedArrayIndexFieldData.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
    int bitsRequired;
    long pageMemorySize = 0;
    PackedInts.FormatAndBits formatAndBits;
    if (pageMaxOrdinal == Long.MIN_VALUE) {
        // empty page - will use the null reader which just stores size
        pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);

    } else {
        long pageMinValue = values.get(pageMinOrdinal);
        long pageMaxValue = values.get(pageMaxOrdinal);
        long pageDelta = pageMaxValue - pageMinValue;
        if (pageDelta != 0) {
            bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
            formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
            pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
            pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
        } else {
            // empty page
            pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
        }
    }
    return pageMemorySize;
}
 
Example 2
Source File: LZ4.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
void reset(byte[] bytes, int off, int len) {
  Objects.checkFromIndexSize(off, len, bytes.length);
  this.bytes = bytes;
  this.base = off;
  this.lastOff = off - 1;
  this.end = off + len;
  final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
  final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
  hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
  if (hashTable == null || hashTable.size() < 1 << hashLog || hashTable.getBitsPerValue() < bitsPerOffset) {
    hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
  } else {
    // Avoid calling hashTable.clear(), this makes it costly to compress many short sequences otherwise.
    // Instead, get() checks that references are less than the current offset.
    get(off); // this sets the hashTable for the first 4 bytes as a side-effect
  }
}
 
Example 3
Source File: HyperLogLogPlusPlus.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * Compute the required precision so that <code>count</code> distinct entries
 * would be counted with linear counting.
 */
public static int precisionFromThreshold(long count) {
    final long hashTableEntries = (long) Math.ceil(count / MAX_LOAD_FACTOR);
    int precision = PackedInts.bitsRequired(hashTableEntries * RamUsageEstimator.NUM_BYTES_INT);
    precision = Math.max(precision, MIN_PRECISION);
    precision = Math.min(precision, MAX_PRECISION);
    return precision;
}
 
Example 4
Source File: OrdinalsBuilder.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException {
    this.maxDoc = maxDoc;
    int startBitsPerValue = 8;
    if (numTerms >= 0) {
        startBitsPerValue = PackedInts.bitsRequired(numTerms);
    }
    ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
    spare = new LongsRef();
}
 
Example 5
Source File: MultiOrdinals.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
 */
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) {
    int bitsPerOrd = PackedInts.bitsRequired(numOrds);
    bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue;
    // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
    // beginning of the block and all docs have one at the end of the block
    final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
    final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
    int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
    bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue;

    final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
    final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
    return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
}
 
Example 6
Source File: DocValuesFieldUpdates.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected DocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
  this.maxDoc = maxDoc;
  this.delGen = delGen;
  this.field = field;
  if (type == null) {
    throw new NullPointerException("DocValuesType must not be null");
  }
  this.type = type;
  bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
  docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
}
 
Example 7
Source File: NodeHash.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void rehash() throws IOException {
  final PagedGrowableWriter oldTable = table;

  table = new PagedGrowableWriter(2*oldTable.size(), 1<<30, PackedInts.bitsRequired(count), PackedInts.COMPACT);
  mask = table.size()-1;
  for(long idx=0;idx<oldTable.size();idx++) {
    final long address = oldTable.get(idx);
    if (address != 0) {
      addNew(address);
    }
  }
}
 
Example 8
Source File: PackedArrayIndexFieldData.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(LeafReader reader, PackedLongValues values, Ordinals build, RandomAccessOrds ordinals,
                                                                 long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) {

    CommonSettings.MemoryStorageFormat format;

    // estimate memory usage for a single packed array
    long packedDelta = maxValue - minValue + 1; // allow for a missing value
    // valuesDelta can be negative if the difference between max and min values overflows the positive side of longs.
    int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta);
    PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
    final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;

    // ordinal memory usage
    final long ordinalsSize = build.ramBytesUsed() + values.ramBytesUsed();

    // estimate the memory signature of paged packing
    long pagedSingleValuesSize = (reader.maxDoc() / pageSize + 1) * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // array of pages
    int pageIndex = 0;
    long pageMinOrdinal = Long.MAX_VALUE;
    long pageMaxOrdinal = Long.MIN_VALUE;
    for (int i = 1; i < reader.maxDoc(); ++i, pageIndex = (pageIndex + 1) % pageSize) {
        ordinals.setDocument(i);
        if (ordinals.cardinality() > 0) {
            long ordinal = ordinals.ordAt(0);
            pageMaxOrdinal = Math.max(ordinal, pageMaxOrdinal);
            pageMinOrdinal = Math.min(ordinal, pageMinOrdinal);
        }
        if (pageIndex == pageSize - 1) {
            // end of page, we now know enough to estimate memory usage
            pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);

            pageMinOrdinal = Long.MAX_VALUE;
            pageMaxOrdinal = Long.MIN_VALUE;
        }
    }

    if (pageIndex > 0) {
        // last page estimation
        pageIndex++;
        pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
    }

    if (ordinalsSize < singleValuesSize) {
        if (ordinalsSize < pagedSingleValuesSize) {
            format = CommonSettings.MemoryStorageFormat.ORDINALS;
        } else {
            format = CommonSettings.MemoryStorageFormat.PAGED;
        }
    } else {
        if (pagedSingleValuesSize < singleValuesSize) {
            format = CommonSettings.MemoryStorageFormat.PAGED;
        } else {
            format = CommonSettings.MemoryStorageFormat.PACKED;
        }
    }
    return format;
}
 
Example 9
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected Accountable createValue(LeafReader reader, CacheKey key)
    throws IOException {

  final int maxDoc = reader.maxDoc();

  Terms terms = reader.terms(key.field);

  final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

  final PagedBytes bytes = new PagedBytes(15);

  int startTermsBPV;

  // TODO: use Uninvert?
  if (terms != null) {
    // Try for coarse estimate for number of bits; this
    // should be an underestimate most of the time, which
    // is fine -- GrowableWriter will reallocate as needed
    long numUniqueTerms = terms.size();
    if (numUniqueTerms != -1L) {
      if (numUniqueTerms > maxDoc) {
        throw new IllegalStateException("Type mismatch: " + key.field + " was indexed with multiple values per document, use SORTED_SET instead");
      }

      startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
    } else {
      startTermsBPV = 1;
    }
  } else {
    startTermsBPV = 1;
  }

  PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
  final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

  int termOrd = 0;

  // TODO: use Uninvert?

  if (terms != null) {
    final TermsEnum termsEnum = terms.iterator();
    PostingsEnum docs = null;

    while(true) {
      final BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      if (termOrd >= maxDoc) {
        throw new IllegalStateException("Type mismatch: " + key.field + " was indexed with multiple values per document, use SORTED_SET instead");
      }

      termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
      docs = termsEnum.postings(docs, PostingsEnum.NONE);
      while (true) {
        final int docID = docs.nextDoc();
        if (docID == DocIdSetIterator.NO_MORE_DOCS) {
          break;
        }
        // Store 1+ ord into packed bits
        docToTermOrd.set(docID, 1+termOrd);
      }
      termOrd++;
    }
  }

  // maybe an int-only impl?
  return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.build(), docToTermOrd.getMutable(), termOrd);
}
 
Example 10
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected Accountable createValue(LeafReader reader, CacheKey key)
    throws IOException {

  // TODO: would be nice to first check if DocTermsIndex
  // was already cached for this field and then return
  // that instead, to avoid insanity

  final int maxDoc = reader.maxDoc();
  Terms terms = reader.terms(key.field);

  final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

  final int termCountHardLimit = maxDoc;

  // Holds the actual term data, expanded.
  final PagedBytes bytes = new PagedBytes(15);

  int startBPV;

  if (terms != null) {
    // Try for coarse estimate for number of bits; this
    // should be an underestimate most of the time, which
    // is fine -- GrowableWriter will reallocate as needed
    long numUniqueTerms = terms.size();
    if (numUniqueTerms != -1L) {
      if (numUniqueTerms > termCountHardLimit) {
        numUniqueTerms = termCountHardLimit;
      }
      startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
    } else {
      startBPV = 1;
    }
  } else {
    startBPV = 1;
  }

  final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);
  
  // pointer==0 means not set
  bytes.copyUsingLengthPrefix(new BytesRef());

  if (terms != null) {
    int termCount = 0;
    final TermsEnum termsEnum = terms.iterator();
    PostingsEnum docs = null;
    while(true) {
      if (termCount++ == termCountHardLimit) {
        // app is misusing the API (there is more than
        // one term per doc); in this case we make best
        // effort to load what we can (see LUCENE-2142)
        break;
      }

      final BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      final long pointer = bytes.copyUsingLengthPrefix(term);
      docs = termsEnum.postings(docs, PostingsEnum.NONE);
      while (true) {
        final int docID = docs.nextDoc();
        if (docID == DocIdSetIterator.NO_MORE_DOCS) {
          break;
        }
        docToOffset.set(docID, pointer);
      }
    }
  }

  final PackedInts.Reader offsetReader = docToOffset.getMutable();
  Bits docsWithField = new Bits() {
    @Override
    public boolean get(int index) {
      return offsetReader.get(index) != 0;
    }

    @Override
    public int length() {
      return maxDoc;
    }
  };

  wrapper.setDocsWithField(reader, key.field, docsWithField, null);
  // maybe an int-only impl?
  return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader, docsWithField);
}