Java Code Examples for org.apache.parquet.bytes.BytesInput

The following examples show how to use org.apache.parquet.bytes.BytesInput. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: iceberg   Source File: BasePageIterator.java    License: Apache License 2.0 6 votes vote down vote up
protected void initFromPage(DataPageV1 initPage) {
  this.triplesCount = initPage.getValueCount();
  ValuesReader rlReader = initPage.getRlEncoding().getValuesReader(desc, ValuesType.REPETITION_LEVEL);
  this.repetitionLevels = new ValuesReaderIntIterator(rlReader);
  try {
    BytesInput bytes = initPage.getBytes();
    LOG.debug("page size {} bytes and {} records", bytes.size(), triplesCount);
    LOG.debug("reading repetition levels at 0");
    ByteBufferInputStream in = bytes.toInputStream();
    rlReader.initFromPage(triplesCount, in);
    LOG.debug("reading definition levels at {}", in.position());
    initDefinitionLevelsReader(initPage, desc, in, triplesCount);
    LOG.debug("reading data at {}", in.position());
    initDataReader(initPage.getValueEncoding(), in, initPage.getValueCount());
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read page " + initPage + " in col " + desc, e);
  }
}
 
Example 2
Source Project: iceberg   Source File: PageIterator.java    License: Apache License 2.0 6 votes vote down vote up
private void initFromPage(DataPageV1 page) {
  this.triplesCount = page.getValueCount();
  ValuesReader rlReader = page.getRlEncoding().getValuesReader(desc, REPETITION_LEVEL);
  ValuesReader dlReader = page.getDlEncoding().getValuesReader(desc, DEFINITION_LEVEL);
  this.repetitionLevels = new ValuesReaderIntIterator(rlReader);
  this.definitionLevels = new ValuesReaderIntIterator(dlReader);
  try {
    BytesInput bytes = page.getBytes();
    LOG.debug("page size {} bytes and {} records", bytes.size(), triplesCount);
    LOG.debug("reading repetition levels at 0");
    ByteBufferInputStream in = bytes.toInputStream();
    rlReader.initFromPage(triplesCount, in);
    LOG.debug("reading definition levels at {}", in.position());
    dlReader.initFromPage(triplesCount, in);
    LOG.debug("reading data at {}", in.position());
    initDataReader(page.getValueEncoding(), in, page.getValueCount());
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read page " + page + " in col " + desc, e);
  }
}
 
Example 3
Source Project: dremio-oss   Source File: ParquetRecordWriter.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("deprecation")
private static BytesCompressor toDeprecatedBytesCompressor(final BytesInputCompressor compressor) {
  return new BytesCompressor() {
    @Override
    public BytesInput compress(BytesInput bytes) throws IOException {
      return compressor.compress(bytes);
    }

    @Override
    public CompressionCodecName getCodecName() {
      return compressor.getCodecName();
    }

    @Override
    public void release() {
      compressor.release();
    }
  };
}
 
Example 4
Source Project: datacollector   Source File: AvroParquetConvertCreator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void addNecessaryJarsToJob(Configuration conf) {
  MapreduceUtils.addJarsToJob(conf,
      SemanticVersion.class,
      ParquetWriter.class,
      AvroParquetWriter.class,
      AvroParquetWriterBuilder190Int96.class,
      AvroSchemaConverter190Int96Avro18.class,
      FsInput.class,
      CompressionCodec.class,
      ParquetProperties.class,
      BytesInput.class,
      AvroToParquetConverterUtil.class,
      AvroLogicalTypeSupport.class
  );
}
 
Example 5
Source Project: flink   Source File: AbstractColumnReader.java    License: Apache License 2.0 6 votes vote down vote up
private void readPageV1(DataPageV1 page) throws IOException {
	this.pageValueCount = page.getValueCount();
	ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);

	// Initialize the decoders.
	if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
		throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
	}
	int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
	this.runLenDecoder = new RunLengthDecoder(bitWidth);
	try {
		BytesInput bytes = page.getBytes();
		ByteBufferInputStream in = bytes.toInputStream();
		rlReader.initFromPage(pageValueCount, in);
		this.runLenDecoder.initFromStream(pageValueCount, in);
		prepareNewPage(page.getValueEncoding(), in);
	} catch (IOException e) {
		throw new IOException("could not read page " + page + " in col " + descriptor, e);
	}
}
 
Example 6
Source Project: parquet-mr   Source File: ByteBasedBitPackingEncoder.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * writes an int using the requested number of bits.
 * accepts only values less than 2^bitWidth
 * @param value the value to write
 * @throws IOException if there is an exception while writing
 */
public void writeInt(int value) throws IOException {
  input[inputSize] = value;
  ++ inputSize;
  if (inputSize == VALUES_WRITTEN_AT_A_TIME) {
    pack();
    if (packedPosition == slabSize) {
      slabs.add(BytesInput.from(packed));
      totalFullSlabSize += slabSize;
      if (slabSize < bitWidth * MAX_SLAB_SIZE_MULT) {
        slabSize *= 2;
      }
      initPackedSlab();
    }
  }
}
 
Example 7
public BytesInput toBytes() throws IOException {
  Preconditions.checkArgument(!toBytesCalled,
      "You cannot call toBytes() more than once without calling reset()");

  // write anything that is buffered / queued up for an rle-run
  if (repeatCount >= 8) {
    writeRleRun();
  } else if(numBufferedValues > 0) {
    for (int i = numBufferedValues; i < 8; i++) {
      bufferedValues[i] = 0;
    }
    writeOrAppendBitPackedRun();
    endPreviousBitPackedRun();
  } else {
    endPreviousBitPackedRun();
  }

  toBytesCalled = true;
  return BytesInput.from(baos);
}
 
Example 8
Source Project: parquet-mr   Source File: DataPageV2.java    License: Apache License 2.0 6 votes vote down vote up
public DataPageV2(
    int rowCount, int nullCount, int valueCount,
    BytesInput repetitionLevels, BytesInput definitionLevels,
    Encoding dataEncoding, BytesInput data,
    int uncompressedSize,
    Statistics<?> statistics,
    boolean isCompressed) {
  super(Math.toIntExact(repetitionLevels.size() + definitionLevels.size() + data.size()), uncompressedSize, valueCount);
  this.rowCount = rowCount;
  this.nullCount = nullCount;
  this.repetitionLevels = repetitionLevels;
  this.definitionLevels = definitionLevels;
  this.dataEncoding = dataEncoding;
  this.data = data;
  this.statistics = statistics;
  this.isCompressed = isCompressed;
}
 
Example 9
Source Project: parquet-mr   Source File: DataPageV2.java    License: Apache License 2.0 6 votes vote down vote up
private DataPageV2(
    int rowCount, int nullCount, int valueCount, long firstRowIndex,
    BytesInput repetitionLevels, BytesInput definitionLevels,
    Encoding dataEncoding, BytesInput data,
    int uncompressedSize,
    Statistics<?> statistics,
    boolean isCompressed) {
  super(Math.toIntExact(repetitionLevels.size() + definitionLevels.size() + data.size()), uncompressedSize,
      valueCount, firstRowIndex);
  this.rowCount = rowCount;
  this.nullCount = nullCount;
  this.repetitionLevels = repetitionLevels;
  this.definitionLevels = definitionLevels;
  this.dataEncoding = dataEncoding;
  this.data = data;
  this.statistics = statistics;
  this.isCompressed = isCompressed;
}
 
Example 10
Source Project: parquet-mr   Source File: TestDictionary.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBinaryDictionary() throws IOException {
  int COUNT = 100;
  ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(200, 10000);
  writeRepeated(COUNT, cw, "a");
  BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
  writeRepeated(COUNT, cw, "b");
  BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
  // now we will fall back
  writeDistinct(COUNT, cw, "c");
  BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);

  DictionaryValuesReader cr = initDicReader(cw, BINARY);
  checkRepeated(COUNT, bytes1, cr, "a");
  checkRepeated(COUNT, bytes2, cr, "b");
  BinaryPlainValuesReader cr2 = new BinaryPlainValuesReader();
  checkDistinct(COUNT, bytes3, cr2, "c");
}
 
Example 11
Source Project: Bats   Source File: ParquetColumnChunkPageWriteStore.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void writePage(BytesInput bytes,
                      int valueCount,
                      Statistics statistics,
                      Encoding rlEncoding,
                      Encoding dlEncoding,
                      Encoding valuesEncoding) throws IOException {
  long uncompressedSize = bytes.size();
  // Parquet library creates bad metadata if the uncompressed or compressed size of a page exceeds Integer.MAX_VALUE
  if (uncompressedSize > Integer.MAX_VALUE) {
    throw new ParquetEncodingException(
        "Cannot write page larger than Integer.MAX_VALUE bytes: " +
            uncompressedSize);
  }
  BytesInput compressedBytes = compressor.compress(bytes);
  long compressedSize = compressedBytes.size();
  if (compressedSize > Integer.MAX_VALUE) {
    throw new ParquetEncodingException(
        "Cannot write compressed page larger than Integer.MAX_VALUE bytes: "
            + compressedSize);
  }
  parquetMetadataConverter.writeDataPageHeader(
      (int)uncompressedSize,
      (int)compressedSize,
      valueCount,
      statistics,
      rlEncoding,
      dlEncoding,
      valuesEncoding,
      buf);
  this.uncompressedLength += uncompressedSize;
  this.compressedLength += compressedSize;
  this.totalValueCount += valueCount;
  this.pageCount += 1;
  this.totalStatistics.mergeStatistics(statistics);
  compressedBytes.writeAllTo(buf);
  rlEncodings.add(rlEncoding);
  dlEncodings.add(dlEncoding);
  dataEncodings.add(valuesEncoding);
}
 
Example 12
Source Project: Bats   Source File: ParquetColumnChunkPageWriteStore.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void writePageV2(int rowCount,
                        int nullCount,
                        int valueCount,
                        BytesInput repetitionLevels,
                        BytesInput definitionLevels,
                        Encoding dataEncoding,
                        BytesInput data,
                        Statistics<?> statistics) throws IOException {
  int rlByteLength = toIntWithCheck(repetitionLevels.size());
  int dlByteLength = toIntWithCheck(definitionLevels.size());
  int uncompressedSize = toIntWithCheck(
      data.size() + repetitionLevels.size() + definitionLevels.size()
  );
  BytesInput compressedData = compressor.compress(data);
  int compressedSize = toIntWithCheck(
      compressedData.size() + repetitionLevels.size() + definitionLevels.size()
  );
  parquetMetadataConverter.writeDataPageV2Header(
      uncompressedSize, compressedSize,
      valueCount, nullCount, rowCount,
      statistics,
      dataEncoding,
      rlByteLength,
      dlByteLength,
      buf);
  this.uncompressedLength += uncompressedSize;
  this.compressedLength += compressedSize;
  this.totalValueCount += valueCount;
  this.pageCount += 1;
  this.totalStatistics.mergeStatistics(statistics);

  definitionLevels.writeAllTo(buf);
  compressedData.writeAllTo(buf);

  dataEncodings.add(dataEncoding);
}
 
Example 13
Source Project: Bats   Source File: ParquetColumnChunkPageWriteStore.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Writes a number of pages within corresponding column chunk
 * @param writer the parquet file writer
 * @throws IOException if the file can not be created
 */
public void writeToFileWriter(ParquetFileWriter writer) throws IOException {
  writer.startColumn(path, totalValueCount, compressor.getCodecName());
  if (dictionaryPage != null) {
    writer.writeDictionaryPage(dictionaryPage);
    // tracking the dictionary encoding is handled in writeDictionaryPage
  }
  writer.writeDataPages(BytesInput.from(buf), uncompressedLength, compressedLength, totalStatistics, rlEncodings, dlEncodings, dataEncodings);
  writer.endColumn();
  logger.debug(
      String.format(
          "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s",
          buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, Sets.newHashSet(dataEncodings))
          + (dictionaryPage != null ? String.format(
          ", dic { %,d entries, %,dB raw, %,dB comp}",
          dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize())
          : ""));
  rlEncodings.clear();
  dlEncodings.clear();
  dataEncodings.clear();
  pageCount = 0;
}
 
Example 14
Source Project: Bats   Source File: ParquetColumnChunkPageWriteStore.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException {
  if (this.dictionaryPage != null) {
    throw new ParquetEncodingException("Only one dictionary page is allowed");
  }
  BytesInput dictionaryBytes = dictionaryPage.getBytes();
  int uncompressedSize = (int)dictionaryBytes.size();
  BytesInput compressedBytes = compressor.compress(dictionaryBytes);
  this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize,
      dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding());
}
 
Example 15
Source Project: presto   Source File: ParquetCompressor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ParquetDataOutput compress(BytesInput bytesInput)
        throws IOException
{
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    try (GZIPOutputStream outputStream = new GZIPOutputStream(byteArrayOutputStream)) {
        outputStream.write(bytesInput.toByteArray(), 0, toIntExact(bytesInput.size()));
    }
    byte[] bytes = byteArrayOutputStream.toByteArray();
    return createDataOutput(Slices.wrappedBuffer(bytes, 0, bytes.length));
}
 
Example 16
Source Project: presto   Source File: ParquetCompressor.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ParquetDataOutput compress(BytesInput bytesInput)
        throws IOException
{
    int minCompressionBufferSize = compressor.maxCompressedLength(toIntExact(bytesInput.size()));
    byte[] compressionBuffer = new byte[minCompressionBufferSize];
    byte[] bytes = bytesInput.toByteArray();
    // TODO compressedDataSize > bytes.length?
    int compressedDataSize = compressor.compress(bytes, 0, bytes.length, compressionBuffer, 0, compressionBuffer.length);
    return createDataOutput(Slices.wrappedBuffer(compressionBuffer, 0, compressedDataSize));
}
 
Example 17
Source Project: parquet-mr   Source File: ParquetFileWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Writes a single page
 * @param valueCount count of values
 * @param uncompressedPageSize the size of the data once uncompressed
 * @param bytes the compressed data for the page without header
 * @param statistics the statistics of the page
 * @param rowCount the number of rows in the page
 * @param rlEncoding encoding of the repetition level
 * @param dlEncoding encoding of the definition level
 * @param valuesEncoding encoding of values
 * @throws IOException if any I/O error occurs during writing the file
 */
public void writeDataPage(
    int valueCount, int uncompressedPageSize,
    BytesInput bytes,
    Statistics statistics,
    long rowCount,
    Encoding rlEncoding,
    Encoding dlEncoding,
    Encoding valuesEncoding) throws IOException {
  long beforeHeader = out.getPos();
  innerWriteDataPage(valueCount, uncompressedPageSize, bytes, statistics, rlEncoding, dlEncoding, valuesEncoding);

  offsetIndexBuilder.add((int) (out.getPos() - beforeHeader), rowCount);
}
 
Example 18
Source Project: presto   Source File: PrimitiveColumnWriter.java    License: Apache License 2.0 5 votes vote down vote up
private List<ParquetDataOutput> getDataStreams()
        throws IOException
{
    List<ParquetDataOutput> dictPage = new ArrayList<>();
    if (currentPageRows > 0) {
        flushCurrentPageToBuffer();
    }
    // write dict page if possible
    DictionaryPage dictionaryPage = primitiveValueWriter.toDictPageAndClose();
    if (dictionaryPage != null) {
        BytesInput pageBytes = copy(dictionaryPage.getBytes());
        long uncompressedSize = dictionaryPage.getUncompressedSize();

        ParquetDataOutput pageData = createDataOutput(pageBytes);
        if (compressor != null) {
            pageData = compressor.compress(pageBytes);
        }
        long compressedSize = pageData.size();

        ByteArrayOutputStream dictStream = new ByteArrayOutputStream();
        parquetMetadataConverter.writeDictionaryPageHeader(toIntExact(uncompressedSize),
                toIntExact(compressedSize),
                dictionaryPage.getDictionarySize(),
                dictionaryPage.getEncoding(),
                dictStream);
        ParquetDataOutput pageHeader = createDataOutput(Slices.wrappedBuffer(dictStream.toByteArray()));
        dictPage.add(pageHeader);
        dictPage.add(pageData);
        totalCompressedSize += pageHeader.size() + compressedSize;
        totalUnCompressedSize += pageHeader.size() + uncompressedSize;

        primitiveValueWriter.resetDictionary();
    }
    getDataStreamsCalled = true;

    return ImmutableList.<ParquetDataOutput>builder()
            .addAll(dictPage)
            .addAll(pageBuffer)
            .build();
}
 
Example 19
Source Project: iceberg   Source File: BasePageIterator.java    License: Apache License 2.0 5 votes vote down vote up
IntIterator newRLEIterator(int maxLevel, BytesInput bytes) {
  try {
    if (maxLevel == 0) {
      return new PageIterator.NullIntIterator();
    }
    return new RLEIntIterator(
        new RunLengthBitPackingHybridDecoder(
            BytesUtils.getWidthFromMaxInt(maxLevel),
            bytes.toInputStream()));
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read levels in page for col " + desc, e);
  }
}
 
Example 20
Source Project: iceberg   Source File: PageIterator.java    License: Apache License 2.0 5 votes vote down vote up
private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) {
  try {
    if (maxLevel == 0) {
      return new NullIntIterator();
    }
    return new RLEIntIterator(
        new RunLengthBitPackingHybridDecoder(
            BytesUtils.getWidthFromMaxInt(maxLevel),
            bytes.toInputStream()));
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read levels in page for col " + desc, e);
  }
}
 
Example 21
Source Project: parquet-mr   Source File: FileEncodingsIT.java    License: Apache License 2.0 5 votes vote down vote up
private static DictionaryPage reusableCopy(DictionaryPage dict) {
  if (dict == null) {
    return null;
  }
  try {
    return new DictionaryPage(
        BytesInput.from(dict.getBytes().toByteArray()),
        dict.getDictionarySize(), dict.getEncoding());
  } catch (IOException e) {
    throw new ParquetDecodingException("Cannot read dictionary", e);
  }
}
 
Example 22
Source Project: parquet-mr   Source File: ByteBasedBitPackingEncoder.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @return the bytes representing the packed values
 * @throws IOException if there is an exception while creating the BytesInput
 */
public BytesInput toBytes() throws IOException {
  int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);

  LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
  if (inputSize > 0) {
    for (int i = inputSize; i < input.length; i++) {
      input[i] = 0;
    }
    pack();
  }
  return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength));
}
 
Example 23
Source Project: parquet-mr   Source File: DictionaryValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BytesInput getBytes() {
  int maxDicId = getDictionarySize() - 1;
  LOG.debug("max dic id {}", maxDicId);
  int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);
  int initialSlabSize =
      CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10);

  RunLengthBitPackingHybridEncoder encoder =
      new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator);
  encoders.add(encoder);
  IntIterator iterator = encodedValues.iterator();
  try {
    while (iterator.hasNext()) {
      encoder.writeInt(iterator.next());
    }
    // encodes the bit width
    byte[] bytesHeader = new byte[] { (byte) bitWidth };
    BytesInput rleEncodedBytes = encoder.toBytes();
    LOG.debug("rle encoded bytes {}", rleEncodedBytes.size());
    BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
    // remember size of dictionary when we last wrote a page
    lastUsedDictionarySize = getDictionarySize();
    lastUsedDictionaryByteSize = dictionaryByteSize;
    return bytes;
  } catch (IOException e) {
    throw new ParquetEncodingException("could not encode the values", e);
  }
}
 
Example 24
@Override
public BytesInput getBytes() {
  try {
    out.flush();
  } catch (IOException e) {
    throw new ParquetEncodingException("could not write page", e);
  }
  LOG.debug("writing a buffer of size {}", arrayOut.size());
  return BytesInput.concat(lengthWriter.getBytes(), BytesInput.from(arrayOut));
}
 
Example 25
@Override
public BytesInput getBytes() {
  try {
    // prepend the length of the column
    BytesInput rle = encoder.toBytes();
    return BytesInput.concat(BytesInput.fromInt(Math.toIntExact(rle.size())), rle);
  } catch (IOException e) {
    throw new ParquetEncodingException(e);
  }
}
 
Example 26
/**
 * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset()
 *
 * @return a BytesInput that contains the encoded page data
 */
@Override
public BytesInput getBytes() {
  // The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount
  if (deltaValuesToFlush != 0) {
    flushBlockBuffer();
  }
  return BytesInput.concat(
          config.toBytesInput(),
          BytesInput.fromUnsignedVarInt(totalValueCount),
          BytesInput.fromZigZagVarLong(firstValue),
          BytesInput.from(baos));
}
 
Example 27
/**
 * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset()
 *
 * @return a BytesInput that contains the encoded page data
 */
@Override
public BytesInput getBytes() {
  // The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount
  if (deltaValuesToFlush != 0) {
    flushBlockBuffer();
  }
  return BytesInput.concat(
          config.toBytesInput(),
          BytesInput.fromUnsignedVarInt(totalValueCount),
          BytesInput.fromZigZagVarInt(firstValue),
          BytesInput.from(baos));
}
 
Example 28
Source Project: parquet-mr   Source File: CompressionConverter.java    License: Apache License 2.0 5 votes vote down vote up
private byte[] translatePageLoad(TransParquetFileReader reader, boolean isCompressed, CompressionCodecFactory.BytesInputCompressor compressor,
                                 CompressionCodecFactory.BytesInputDecompressor decompressor, int payloadLength, int rawDataLength) throws IOException {
  BytesInput data = readBlock(payloadLength, reader);
  if (isCompressed) {
    data = decompressor.decompress(data, rawDataLength);
  }
  BytesInput newCompressedData = compressor.compress(data);
  return newCompressedData.toByteArray();
}
 
Example 29
Source Project: parquet-mr   Source File: ByteBitPackingValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BytesInput getBytes() {
  try {
    return encoder.toBytes();
  } catch (IOException e) {
    throw new ParquetEncodingException(e);
  }
}
 
Example 30
Source Project: parquet-mr   Source File: FallbackValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public BytesInput getBytes() {
  if (!fellBackAlready && firstPage) {
    // we use the first page to decide if we're going to use this encoding
    BytesInput bytes = initialWriter.getBytes();
    if (!initialWriter.isCompressionSatisfying(rawDataByteSize, bytes.size())) {
      fallBack();
    } else {
      return bytes;
    }
  }
  return currentWriter.getBytes();
}