Java Code Examples for org.apache.arrow.vector.FieldVector#getDataBufferAddress()

The following examples show how to use org.apache.arrow.vector.FieldVector#getDataBufferAddress() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SumZeroAccumulatorsNoSpill.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public void accumulate(final long memoryAddr, final int count) {
  final long maxAddr = memoryAddr + count * WIDTH_ORDINAL;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] valueAddresses = this.valueAddresses;
  final int scale = ((DecimalVector)inputVector).getScale();

  int incomingIndex = 0;
  for(long ordinalAddr = memoryAddr; ordinalAddr < maxAddr; ordinalAddr += WIDTH_ORDINAL, incomingIndex++) {
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    java.math.BigDecimal newVal = DecimalAccumulatorUtilsNoSpill.getBigDecimal(incomingValue + (incomingIndex * WIDTH_INPUT), valBuf, scale);
    final int tableIndex = PlatformDependent.getInt(ordinalAddr);
    final long sumAddr = valueAddresses[tableIndex >>> LBlockHashTableNoSpill.BITS_IN_CHUNK] + (tableIndex & LBlockHashTableNoSpill.CHUNK_OFFSET_MASK) * WIDTH_ACCUMULATOR;
    PlatformDependent.putLong(sumAddr, Double.doubleToLongBits(Double.longBitsToDouble(PlatformDependent.getLong(sumAddr)) + newVal.doubleValue() * bitVal));
  }
}
 
Example 2
Source File: MinAccumulatorsNoSpill.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public void accumulate(final long memoryAddr, final int count) {
  final long maxAddr = memoryAddr + count * WIDTH_ORDINAL;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int scale = ((DecimalVector)inputVector).getScale();

  int incomingIndex = 0;
  for (long ordinalAddr = memoryAddr; ordinalAddr < maxAddr; ordinalAddr += WIDTH_ORDINAL, incomingIndex++) {
    java.math.BigDecimal newVal = DecimalAccumulatorUtilsNoSpill.getBigDecimal(incomingValue + (incomingIndex * WIDTH_INPUT), valBuf, scale);
    final int tableIndex = PlatformDependent.getInt(ordinalAddr);
    int chunkIndex = tableIndex >>> LBlockHashTableNoSpill.BITS_IN_CHUNK;
    int chunkOffset = tableIndex & LBlockHashTableNoSpill.CHUNK_OFFSET_MASK;
    final long minAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    PlatformDependent.putLong(minAddr, Double.doubleToLongBits(min(Double.longBitsToDouble(PlatformDependent.getLong(minAddr)), newVal.doubleValue(), bitVal)));
    PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
  }
}
 
Example 3
Source File: SumZeroAccumulatorsNoSpill.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public void accumulate(final long memoryAddr, final int count) {
  final long maxAddr = memoryAddr + count * WIDTH_ORDINAL;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] valueAddresses = this.valueAddresses;
  final int scale = ((DecimalVector)inputVector).getScale();

  int incomingIndex = 0;
  for(long ordinalAddr = memoryAddr; ordinalAddr < maxAddr; ordinalAddr += WIDTH_ORDINAL, incomingIndex++) {
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    // without if we would need to do a multiply which is slow.
    if (bitVal == 0) {
      continue;
    }
    java.math.BigDecimal newVal = DecimalAccumulatorUtilsNoSpill.getBigDecimal(incomingValue + (incomingIndex * WIDTH_INPUT), valBuf, scale);
    final int tableIndex = PlatformDependent.getInt(ordinalAddr);
    final long sumAddr = valueAddresses[tableIndex >>> LBlockHashTableNoSpill.BITS_IN_CHUNK] + (tableIndex & LBlockHashTableNoSpill.CHUNK_OFFSET_MASK) * WIDTH_ACCUMULATOR;
    BigDecimal curVal = DecimalUtils.getBigDecimalFromLEBytes(sumAddr, valBuf, scale);
    BigDecimal runningVal = curVal.add(newVal, DecimalUtils.MATH);
    byte [] valueInArrowBytes = DecimalUtils.convertBigDecimalToArrowByteArray(runningVal);
    PlatformDependent.copyMemory(valueInArrowBytes, 0, sumAddr, WIDTH_INPUT);
  }
}
 
Example 4
Source File: BaseSingleAccumulator.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private void addBatchHelper(final ArrowBuf dataBuffer, final ArrowBuf validityBuffer) {
  resizeAttempted = true;
  /* the memory for accumulator vector comes from the operator's allocator that manages
   * memory for all data structures required for hash agg processing.
   * in other words, we should not use output.getAllocator() here as that allocator is
   * _only_ for for managing memory of output batch.
   */
  FieldVector vector = (FieldVector) output.getTransferPair(computationVectorAllocator).getTo();
  /* store the new vector and increment batches before allocating memory */
  accumulators[batches] = vector;
  final int oldBatches = batches;
  batches++;
  /* if this step or memory allocation inside any child of NestedAccumulator fails,
   * we have captured enough info to rollback the operation.
   */
  loadAccumulatorForNewBatch(vector, dataBuffer, validityBuffer);
  /* need to clear the data since allocate new doesn't do so and we want to start with clean memory */
  initialize(vector);
  bitAddresses[oldBatches] = vector.getValidityBufferAddress();
  valueAddresses[oldBatches] = vector.getDataBufferAddress();

  checkNotNull();
}
 
Example 5
Source File: MinAccumulatorsNoSpill.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count) {
  final long maxAddr = memoryAddr + count * WIDTH_ORDINAL;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;

  int incomingIndex = 0;
  for (long ordinalAddr = memoryAddr; ordinalAddr < maxAddr; ordinalAddr += WIDTH_ORDINAL, incomingIndex++) {
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    if (bitVal == 0) {
      continue;
    }
    /* get the corresponding data from input vector -- source data for accumulation */
    long addressOfInput = incomingValue + (incomingIndex * WIDTH_INPUT);
    long newValLow = PlatformDependent.getLong(addressOfInput);
    long newValHigh = PlatformDependent.getLong(addressOfInput + DecimalUtils.LENGTH_OF_LONG);
    final int tableIndex = PlatformDependent.getInt(ordinalAddr);
    int chunkIndex = tableIndex >>> LBlockHashTableNoSpill.BITS_IN_CHUNK;
    int chunkOffset = tableIndex & LBlockHashTableNoSpill.CHUNK_OFFSET_MASK;
    final long minAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    /* Get current value and compare */
    long curValLow = PlatformDependent.getLong(minAddr);
    long curValHigh = PlatformDependent.getLong(minAddr + DecimalUtils.LENGTH_OF_LONG);
    int compare = DecimalUtils.compareDecimalsAsTwoLongs(newValHigh, newValLow, curValHigh, curValLow);

    if (compare < 0) {
      /* store the accumulated values(new min or existing) at the target location of accumulation vector */
      PlatformDependent.putLong(minAddr, newValLow);
      PlatformDependent.putLong(minAddr + DecimalUtils.LENGTH_OF_LONG, newValHigh);
      PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
    }
  }
}
 
Example 6
Source File: MaxAccumulatorsNoSpill.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count) {
  final long maxMemAddr = memoryAddr + count * WIDTH_ORDINAL;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;

  int incomingIndex = 0;
  for(long ordinalAddr = memoryAddr; ordinalAddr < maxMemAddr; ordinalAddr += WIDTH_ORDINAL, incomingIndex++){
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    if (bitVal == 0) {
      continue;
    }
    /* get the corresponding data from input vector -- source data for accumulation */
    long addressOfInput = incomingValue + (incomingIndex * WIDTH_INPUT);
    long newValLow = PlatformDependent.getLong(addressOfInput);
    long newValHigh = PlatformDependent.getLong(addressOfInput + DecimalUtils.LENGTH_OF_LONG);
    final int tableIndex = PlatformDependent.getInt(ordinalAddr);
    int chunkIndex = tableIndex >>> LBlockHashTableNoSpill.BITS_IN_CHUNK;
    int chunkOffset = tableIndex & LBlockHashTableNoSpill.CHUNK_OFFSET_MASK;
    final long maxAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);

    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    /* store the accumulated values(new max or existing) at the target location of accumulation vector */
    long curValLow = PlatformDependent.getLong(maxAddr);
    long curValHigh = PlatformDependent.getLong(maxAddr + DecimalUtils.LENGTH_OF_LONG);
    int compare = DecimalUtils.compareDecimalsAsTwoLongs(newValHigh, newValLow, curValHigh, curValLow);

    if (compare > 0) {
      /* store the accumulated values(new max or existing) at the target location of
      accumulation vector */
      PlatformDependent.putLong(maxAddr, newValLow);
      PlatformDependent.putLong(maxAddr + 8, newValHigh);
      PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
    }
  }
}
 
Example 7
Source File: SumZeroAccumulators.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  final long maxAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] valueAddresses = this.valueAddresses;
  final int scale = ((DecimalVector)inputVector).getScale();
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    /* get the corresponding data from input vector -- source data for accumulation */
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    long addressOfInput = incomingValue + (incomingIndex * WIDTH_INPUT);
    long newValLow = PlatformDependent.getLong(addressOfInput) * bitVal;
    long newValHigh =
      PlatformDependent.getLong(addressOfInput + DecimalUtils.LENGTH_OF_LONG) * bitVal;
    /* get the hash table batch index */
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    /* get the target address of accumulation vector */
    final long sumAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    /* store the accumulated values at the target location of accumulation vector */
    long curValLow = PlatformDependent.getLong(sumAddr);
    long curValHigh = PlatformDependent.getLong(sumAddr + DecimalUtils.LENGTH_OF_LONG);
    DecimalUtils.addSignedDecimals(sumAddr, newValLow, newValHigh, curValLow, curValHigh);
  }
}
 
Example 8
Source File: NdvAccumulatorsNoSpill.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public void accumulate(final long memoryAddr, final int count) {
  final long maxAddr = memoryAddr + count * WIDTH_ORDINAL;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final int scale = ((DecimalVector) inputVector).getScale();

  int incomingIndex = 0;
  for (long ordinalAddr = memoryAddr; ordinalAddr < maxAddr; ordinalAddr += WIDTH_ORDINAL, incomingIndex++) {
    /* get the corresponding data from input vector -- source data for accumulation */
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    // without if we would need to do a multiply which is slow.
    if (bitVal == 0) {
      continue;
    }

    final ByteBuffer buffer = inputVector.getDataBuffer().nioBuffer(incomingIndex * WIDTH_INPUT, WIDTH_INPUT);

    final int tableIndex = PlatformDependent.getInt(ordinalAddr);
    int chunkIndex = tableIndex >>> LBlockHashTableNoSpill.BITS_IN_CHUNK;
    int chunkOffset = tableIndex & LBlockHashTableNoSpill.CHUNK_OFFSET_MASK;

    final HllAccumHolder ah =  this.accumulators[chunkIndex];
    final HllSketch sketch = ah.getAccums()[chunkOffset];

    sketch.update(Memory.wrap(buffer), 0, WIDTH_INPUT);
  } //for
}
 
Example 9
Source File: MinAccumulatorsNoSpill.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count) {
  final long maxAddr = memoryAddr + count * WIDTH_ORDINAL;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;

  int incomingIndex = 0;
  for(long ordinalAddr = memoryAddr; ordinalAddr < maxAddr; ordinalAddr += WIDTH_ORDINAL, incomingIndex++){
    final long newVal = PlatformDependent.getLong(incomingValue + (incomingIndex * WIDTH_INPUT));
    final int tableIndex = PlatformDependent.getInt(ordinalAddr);
    int chunkIndex = tableIndex >>> LBlockHashTableNoSpill.BITS_IN_CHUNK;
    int chunkOffset = tableIndex & LBlockHashTableNoSpill.CHUNK_OFFSET_MASK;
    final long minAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    // first 4 bytes are the number of days (in little endian, that's the bottom 32 bits)
    // second 4 bytes are the number of milliseconds (in little endian, that's the top 32 bits)
    final int newDays = (int) newVal;
    final int newMillis = (int)(newVal >>> 32);
    // To compare the pairs of day/milli, we swap them, with days getting the most significant bits
    // The incoming value is updated to either be MAX (if incoming is null), or keep as is (if the value is not null)
    final long newSwappedVal = ((((long)newDays) << 32) | newMillis) * bitVal + Long.MAX_VALUE * (bitVal ^ 1);
    final long minVal = PlatformDependent.getLong(minAddr);
    final int minDays = (int) minVal;
    final int minMillis = (int)(minVal >>> 32);
    final long minSwappedVal = (((long)minDays) << 32) | minMillis;
    PlatformDependent.putLong(minAddr, (minSwappedVal < newSwappedVal) ? minVal : newVal);
    PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
  }
}
 
Example 10
Source File: MinAccumulators.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  final long maxAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    /* get the corresponding data from input vector -- source data for accumulation */
    final float newVal = Float.intBitsToFloat(PlatformDependent.getInt(incomingValue + (incomingIndex * WIDTH_INPUT)));
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    /* get the hash table batch index */
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    /* get the target addresses of accumulation vector */
    final long minAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    /* store the accumulated values(new min or existing) at the target location of accumulation vector */
    PlatformDependent.putInt(minAddr, Float.floatToIntBits(min(Float.intBitsToFloat(PlatformDependent.getInt(minAddr)), newVal, bitVal)));
    PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
  }
}
 
Example 11
Source File: MinAccumulators.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  final long maxAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int scale = ((DecimalVector)inputVector).getScale();
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    /* get the corresponding data from input vector -- source data for accumulation */
    java.math.BigDecimal newVal = DecimalUtils.getBigDecimalFromLEBytes(incomingValue + (incomingIndex * WIDTH_INPUT), valBuf, scale);
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    /* get the hash table batch index */
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    /* get the target addresses of accumulation vector */
    final long minAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    /* store the accumulated values(new min or existing) at the target location of accumulation vector */
    PlatformDependent.putLong(minAddr, Double.doubleToLongBits(min(Double.longBitsToDouble(PlatformDependent.getLong(minAddr)), newVal.doubleValue(), bitVal)));
    PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
  }
}
 
Example 12
Source File: SumAccumulatorsNoSpill.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count) {
  final long maxAddr = memoryAddr + count * WIDTH_ORDINAL;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int scale = ((DecimalVector)inputVector).getScale();

  int incomingIndex = 0;
  for (long ordinalAddr = memoryAddr; ordinalAddr < maxAddr; ordinalAddr += WIDTH_ORDINAL, incomingIndex++) {
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    // without if we would need to do a multiply which is slow.
    if (bitVal == 0) {
      continue;
    }
    java.math.BigDecimal newVal = DecimalAccumulatorUtilsNoSpill.getBigDecimal(incomingValue
      + (incomingIndex * WIDTH_INPUT), valBuf, scale);
    final int tableIndex = PlatformDependent.getInt(ordinalAddr);
    int chunkIndex = tableIndex >>> LBlockHashTableNoSpill.BITS_IN_CHUNK;
    int chunkOffset = tableIndex & LBlockHashTableNoSpill.CHUNK_OFFSET_MASK;
    final long sumAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    BigDecimal curVal = DecimalUtils.getBigDecimalFromLEBytes(sumAddr, valBuf, scale);
    BigDecimal runningVal = curVal.add(newVal, DecimalUtils.MATH);
    byte [] valueInArrowBytes = DecimalUtils.convertBigDecimalToArrowByteArray(runningVal);
    PlatformDependent.copyMemory(valueInArrowBytes, 0, sumAddr, WIDTH_INPUT);
    PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
  }
}
 
Example 13
Source File: MinAccumulators.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  final long maxAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    /* get the corresponding data from input vector -- source data for accumulation */
    final double newVal = Double.longBitsToDouble(PlatformDependent.getLong(incomingValue + (incomingIndex * WIDTH_INPUT)));
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    /* get the hash table batch index */
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    /* get the target addresses of accumulation vector */
    final long minAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    /* store the accumulated values(new min or existing) at the target location of accumulation vector */
    PlatformDependent.putLong(minAddr, Double.doubleToLongBits(min(Double.longBitsToDouble(PlatformDependent.getLong(minAddr)), newVal, bitVal)));
    PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
  }
}
 
Example 14
Source File: MaxAccumulators.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  final long maxMemAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int scale = ((DecimalVector)inputVector).getScale();
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxMemAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    /* get the corresponding data from input vector -- source data for accumulation */
    java.math.BigDecimal newVal = DecimalUtils.getBigDecimalFromLEBytes(incomingValue + (incomingIndex * WIDTH_INPUT), valBuf, scale);
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    /* get the hash table batch index */
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    /* get the target addresses of accumulation vector */
    final long maxAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    /* store the accumulated values(new max or existing) at the target location of accumulation vector */
    PlatformDependent.putLong(maxAddr, Double.doubleToLongBits(max(Double.longBitsToDouble(PlatformDependent.getLong(maxAddr)), newVal.doubleValue(), bitVal)));
    PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
  }
}
 
Example 15
Source File: MinAccumulators.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  final long maxOrdinalAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;

  // Like every accumulator, the code below essentially implements:
  //   accumulators[ordinals[i]] += inputs[i]
  // with the only complication that both accumulators and inputs are bits.
  // There's nothing we can do about the locality of the accumulators, but inputs can be processed a word at a time.
  // Algorithm:
  // - get 64 bits worth of inputs, until all inputs exhausted. For each long:
  //   - find the accumulator word it pertains to
  //   - read/update/write the accumulator bit
  // Unfortunately, there is no locality: the incoming partition+ordinal array has been ordered by partition, which
  // removes the ability to process input bits a word at a time
  // In the code below:
  // - input* refers to the data values in the incoming batch
  // - ordinal* refers to the temporary table that hashAgg passes in, identifying which hash table entry each input matched to
  // - min* refers to the accumulator
  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxOrdinalAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    final long minBitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);  // 32-bit read-update-write
    // Update rules:
    // min of two boolean values boils down to doing a bitwise AND on the two
    // If the input bit is set, we update both the accumulator value and its bit
    //    -- the accumulator is AND-ed with the value of the input bit
    //    -- the accumulator bit is OR-ed with 1 (since the input is valid)
    // If the input bit is not set, we update neither the accumulator nor its bit
    //    -- the accumulator is AND-ed with a 1 (thus remaining unchanged)
    //    -- the accumulator bit is OR-ed with 0 (thus remaining unchanged)
    // Thus, the logical function for updating the accumulator is: oldVal AND (NOT(inputBit) OR inputValue)
    // Thus, the logical function for updating the accumulator is: oldBitVal OR inputBit
    final int inputBitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> BITS_PER_BYTE_SHIFT))) >>> (incomingIndex & (BITS_PER_BYTE - 1))) & 1;
    final int inputVal = (PlatformDependent.getByte(incomingValue + ((incomingIndex >>> BITS_PER_BYTE_SHIFT))) >>> (incomingIndex & (BITS_PER_BYTE - 1))) & 1;
    final int minBitUpdateVal = inputBitVal << (chunkOffset & 31);
    // NB: ~inputBitVal will set all the bits to 1, with only the LSB set to 0 or 1. Shifting left leaves zeroes
    // in the bottom (chunkOffset & 31) bits. They need to get set to 1s too
    int minUpdateVal = ((~inputBitVal) | inputVal) << (chunkOffset & 31); // see note above
    minUpdateVal = minUpdateVal | ((1 << (chunkOffset & 31)) - 1);
    final long minAddr = valueAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);  // 32-bit read-update-write
    PlatformDependent.putInt(minAddr, PlatformDependent.getInt(minAddr) & minUpdateVal);
    PlatformDependent.putInt(minBitUpdateAddr, PlatformDependent.getInt(minBitUpdateAddr) | minBitUpdateVal);
  }
}
 
Example 16
Source File: MinAccumulators.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  final long maxAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    // no point continuing.
    if (bitVal == 0) {
      continue;
    }
    /* get the corresponding data from input vector -- source data for accumulation */
    long addressOfInput = incomingValue + (incomingIndex * WIDTH_INPUT);
    long newValLow = PlatformDependent.getLong(addressOfInput);
    long newValHigh = PlatformDependent.getLong(addressOfInput + DecimalUtils.LENGTH_OF_LONG);
    /* get the hash table batch index */
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    /* get the target addresses of accumulation vector */
    final long minAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    /* Get current value and compare */
    long curValLow = PlatformDependent.getLong(minAddr);
    long curValHigh = PlatformDependent.getLong(minAddr + DecimalUtils.LENGTH_OF_LONG);
    int compare = DecimalUtils.compareDecimalsAsTwoLongs(newValHigh, newValLow, curValHigh,
      curValLow);

    if (compare < 0) {
      /* store the accumulated values(new min or existing) at the target location of accumulation vector */
      PlatformDependent.putLong(minAddr, newValLow);
      PlatformDependent.putLong(minAddr + DecimalUtils.LENGTH_OF_LONG, newValHigh);
      PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
    }

  }
}
 
Example 17
Source File: MaxAccumulators.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;

  final long maxOrdinalAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  // Like every accumulator, the code below essentially implements:
  //   accumulators[ordinals[i]] += inputs[i]
  // with the only complication that both accumulators and inputs are bits.
  // There's nothing we can do about the locality of the accumulators, but inputs can be processed a word at a time.
  // Algorithm:
  // - get 64 bits worth of inputs, until all inputs exhausted. For each long:
  //   - find the accumulator word it pertains to
  //   - read/update/write the accumulator bit
  // Unfortunately, there is no locality: the incoming partition+ordinal array has been ordered by partition, which
  // removes the ability to process input bits a word at a time
  // In the code below:
  // - input* refers to the data values in the incoming batch
  // - ordinal* refers to the temporary table that hashAgg passes in, identifying which hash table entry each input matched to
  // - min* refers to the accumulator
  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxOrdinalAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    final long maxBitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);  // 32-bit read-update-write
    // Update rules:
    // max of two boolean values boils down to doing a bitwise OR on the two
    // If the input bit is set, we update both the accumulator value and its bit
    //    -- the accumulator is OR-ed with the value of the input bit
    //    -- the accumulator bit is OR-ed with 1 (since the input is valid)
    // If the input bit is not set, we update neither the accumulator nor its bit
    //    -- the accumulator is OR-ed with a 0 (thus remaining unchanged)
    //    -- the accumulator bit is OR-ed with 0 (thus remaining unchanged)
    // Thus, the logical function for updating the accumulator is: oldVal OR (inputBit AND inputValue)
    // Thus, the logical function for updating the accumulator is: oldBitVal OR inputBit
    // Because the operations are all done in a word length (and not on an individual bit), the AND value for
    // updating the accumulator must have all its other bits set to 1
    final int inputBitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> BITS_PER_BYTE_SHIFT))) >>> (incomingIndex & (BITS_PER_BYTE - 1))) & 1;
    final int inputVal = (PlatformDependent.getByte(incomingValue + ((incomingIndex >>> BITS_PER_BYTE_SHIFT))) >>> (incomingIndex & (BITS_PER_BYTE - 1))) & 1;
    final int maxBitUpdateVal = inputBitVal << (chunkOffset & 31);
    int minUpdateVal = (inputBitVal & inputVal) << (chunkOffset & 31);
    final long maxAddr = valueAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    PlatformDependent.putInt(maxAddr, PlatformDependent.getInt(maxAddr) | minUpdateVal);
    PlatformDependent.putInt(maxBitUpdateAddr, PlatformDependent.getInt(maxBitUpdateAddr) | maxBitUpdateVal);
  }
}
 
Example 18
Source File: MaxAccumulators.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  final long maxMemAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxMemAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    // no point continuing.
    if (bitVal == 0) {
      continue;
    }
    /* get the corresponding data from input vector -- source data for accumulation */
    long addressOfInput = incomingValue + (incomingIndex * WIDTH_INPUT);
    long newValLow = PlatformDependent.getLong(addressOfInput);
    long newValHigh = PlatformDependent.getLong(addressOfInput + DecimalUtils.LENGTH_OF_LONG);
    /* get the hash table batch index */
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    /* get the target addresses of accumulation vector */
    final long maxAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    /* store the accumulated values(new max or existing) at the target location of accumulation vector */
    long curValLow = PlatformDependent.getLong(maxAddr);
    long curValHigh = PlatformDependent.getLong(maxAddr + DecimalUtils.LENGTH_OF_LONG);
    int compare = DecimalUtils.compareDecimalsAsTwoLongs(newValHigh, newValLow, curValHigh,
      curValLow);

    if (compare > 0) {
      /* store the accumulated values(new max or existing) at the target location of
      accumulation vector */
      PlatformDependent.putLong(maxAddr, newValLow);
      PlatformDependent.putLong(maxAddr + DecimalUtils.LENGTH_OF_LONG, newValHigh);
      PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
    }
  }
}
 
Example 19
Source File: MinAccumulators.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public void accumulate(final long memoryAddr, final int count,
                       final int bitsInChunk, final int chunkOffsetMask) {
  final long maxMemAddr = memoryAddr + count * PARTITIONINDEX_HTORDINAL_WIDTH;
  FieldVector inputVector = getInput();
  final long incomingBit = inputVector.getValidityBufferAddress();
  final long incomingValue = inputVector.getDataBufferAddress();
  final long[] bitAddresses = this.bitAddresses;
  final long[] valueAddresses = this.valueAddresses;
  final int maxValuesPerBatch = super.maxValuesPerBatch;

  for (long partitionAndOrdinalAddr = memoryAddr; partitionAndOrdinalAddr < maxMemAddr; partitionAndOrdinalAddr += PARTITIONINDEX_HTORDINAL_WIDTH) {
    /* get the hash table ordinal */
    final int tableIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + HTORDINAL_OFFSET);
    /* get the index of data in input vector */
    final int incomingIndex = PlatformDependent.getInt(partitionAndOrdinalAddr + KEYINDEX_OFFSET);
    /* get the corresponding data from input vector -- source data for accumulation */
    final long newVal = PlatformDependent.getLong(incomingValue + (incomingIndex * WIDTH_INPUT));
    final int bitVal = (PlatformDependent.getByte(incomingBit + ((incomingIndex >>> 3))) >>> (incomingIndex & 7)) & 1;
    /* get the hash table batch index */
    final int chunkIndex = tableIndex >>> bitsInChunk;
    final int chunkOffset = tableIndex & chunkOffsetMask;
    /* get the target addresses of accumulation vector */
    final long minAddr = valueAddresses[chunkIndex] + (chunkOffset) * WIDTH_ACCUMULATOR;
    final long bitUpdateAddr = bitAddresses[chunkIndex] + ((chunkOffset >>> 5) * 4);
    final int bitUpdateVal = bitVal << (chunkOffset & 31);
    // first 4 bytes are the number of days (in little endian, that's the bottom 32 bits)
    // second 4 bytes are the number of milliseconds (in little endian, that's the top 32 bits)
    final int newDays = (int) newVal;
    final int newMillis = (int)(newVal >>> 32);
    // To compare the pairs of day/milli, we swap them, with days getting the most significant bits
    // The incoming value is updated to either be MAX (if incoming is null), or keep as is (if the value is not null)
    final long newSwappedVal = ((((long)newDays) << 32) | newMillis) * bitVal + Long.MAX_VALUE * (bitVal ^ 1);
    final long minVal = PlatformDependent.getLong(minAddr);
    final int minDays = (int) minVal;
    final int minMillis = (int)(minVal >>> 32);
    final long minSwappedVal = (((long)minDays) << 32) | minMillis;
    /* store the accumulated values(new min or existing) at the target location of accumulation vector */
    PlatformDependent.putLong(minAddr, (minSwappedVal < newSwappedVal) ? minVal : newVal);
    PlatformDependent.putInt(bitUpdateAddr, PlatformDependent.getInt(bitUpdateAddr) | bitUpdateVal);
  }
}
 
Example 20
Source File: BaseSingleAccumulator.java    From dremio-oss with Apache License 2.0 3 votes vote down vote up
/**
 * Take the accumulator vector (the vector that stores computed values)
 * for a particular batch (identified by batchIndex) and output its contents.
 * Output is done by transferring the contents from accumulator vector
 * to its counterpart in outgoing container. As part of transfer,
 * the memory ownership (along with data) is transferred from source
 * vector's allocator to target vector's allocator and source vector's
 * memory is released.
 *
 * While the transfer is good as it essentially
 * avoids copy, we still want the memory associated with
 * allocator of source vector because of post-spill processing
 * where this accumulator vector will still continue to store
 * the computed values as we start treating spilled batches
 * as new input into the operator.
 *
 * This is why we need to immediately allocate
 * the accumulator vector after transfer is done. However we do this
 * for a singe batch only as once we are done outputting a partition,
 * we anyway get rid of all but 1 batch.
 *
 * @param batchIndex batch to output
 */
@Override
public void output(final int batchIndex) {
  final FieldVector accumulationVector = accumulators[batchIndex];
  final TransferPair transferPair = accumulationVector.makeTransferPair(transferVector);
  transferPair.transfer();
  if (batchIndex == 0) {
    ((FixedWidthVector) accumulationVector).allocateNew(maxValuesPerBatch);
    accumulationVector.setValueCount(0);
    initialize(accumulationVector);
    bitAddresses[batchIndex] = accumulationVector.getValidityBufferAddress();
    valueAddresses[batchIndex] = accumulationVector.getDataBufferAddress();
  }
}